diff --git a/pybandits/cmab.py b/pybandits/cmab.py index d26a9b9..1beab7b 100644 --- a/pybandits/cmab.py +++ b/pybandits/cmab.py @@ -63,7 +63,8 @@ class BaseCmabBernoulli(BaseMab): predict_with_proba: bool predict_actions_randomly: bool - @field_validator("actions") + @field_validator("actions", mode="after") + @classmethod def check_bayesian_logistic_regression_models_len(cls, v): blr_betas_len = [len(b.betas) for b in v.values()] if not all(blr_betas_len[0] == x for x in blr_betas_len): @@ -329,6 +330,7 @@ def create_cmab_bernoulli_cold_start( n_features: PositiveInt, epsilon: Optional[Float01] = None, default_action: Optional[ActionId] = None, + fast_inference: bool = False, ) -> CmabBernoulli: """ Utility function to create a Contextual Bernoulli Multi-Armed Bandit with Thompson Sampling, with default @@ -347,6 +349,8 @@ def create_cmab_bernoulli_cold_start( default_action: Optional[ActionId] The default action to select with a probability of epsilon when using the epsilon-greedy approach. If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. + fast_inference: bool, defaults to False + Whether to utilize MCMC (False) or variational inference (True) for the Bayesian inference on update Returns ------- @@ -354,8 +358,10 @@ def create_cmab_bernoulli_cold_start( Contextual Multi-Armed Bandit with strategy = ClassicBandit """ actions = {} - for a in set(action_ids): - actions[a] = create_bayesian_logistic_regression_cold_start(n_betas=n_features) + for action_id in set(action_ids): + actions[action_id] = create_bayesian_logistic_regression_cold_start( + n_betas=n_features, fast_inference=fast_inference + ) mab = CmabBernoulli(actions=actions, epsilon=epsilon, default_action=default_action) mab.predict_actions_randomly = True return mab @@ -368,6 +374,7 @@ def create_cmab_bernoulli_bai_cold_start( exploit_p: Optional[Float01] = None, epsilon: Optional[Float01] = None, default_action: Optional[ActionId] = None, + fast_inference: bool = False, ) -> CmabBernoulliBAI: """ Utility function to create a Contextual Bernoulli Multi-Armed Bandit with Thompson Sampling, and Best Action @@ -395,6 +402,9 @@ def create_cmab_bernoulli_bai_cold_start( default_action: Optional[ActionId] The default action to select with a probability of epsilon when using the epsilon-greedy approach. If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. + fast_inference: bool, defaults to False + Whether to utilize standard MCMC (False) or faster variational inference (True) + for the Bayesian inference on update steps. Returns ------- @@ -403,7 +413,7 @@ def create_cmab_bernoulli_bai_cold_start( """ actions = {} for a in set(action_ids): - actions[a] = create_bayesian_logistic_regression_cold_start(n_betas=n_features) + actions[a] = create_bayesian_logistic_regression_cold_start(n_betas=n_features, fast_inference=fast_inference) mab = CmabBernoulliBAI(actions=actions, exploit_p=exploit_p, epsilon=epsilon, default_action=default_action) mab.predict_actions_randomly = True return mab @@ -416,6 +426,7 @@ def create_cmab_bernoulli_cc_cold_start( subsidy_factor: Optional[Float01] = None, epsilon: Optional[Float01] = None, default_action: Optional[ActionId] = None, + fast_inference: bool = False, ) -> CmabBernoulliCC: """ Utility function to create a Stochastic Bernoulli Multi-Armed Bandit with Thompson Sampling, and Cost Control @@ -449,6 +460,9 @@ def create_cmab_bernoulli_cc_cold_start( default_action: Optional[ActionId] The default action to select with a probability of epsilon when using the epsilon-greedy approach. If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. + fast_inference: bool, defaults to False + Whether to utilize standard MCMC (False) or faster variational inference (True) + for the Bayesian inference on update steps. Returns ------- @@ -457,7 +471,9 @@ def create_cmab_bernoulli_cc_cold_start( """ actions = {} for a, cost in action_ids_cost.items(): - actions[a] = create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=cost) + actions[a] = create_bayesian_logistic_regression_cc_cold_start( + n_betas=n_features, cost=cost, fast_inference=fast_inference + ) mab = CmabBernoulliCC( actions=actions, subsidy_factor=subsidy_factor, epsilon=epsilon, default_action=default_action ) diff --git a/pybandits/model.py b/pybandits/model.py index c94ba1f..3de91b9 100644 --- a/pybandits/model.py +++ b/pybandits/model.py @@ -22,9 +22,11 @@ from random import betavariate -from typing import List, Tuple +from typing import List, Optional, Tuple, Union -from numpy import array, c_, exp, insert, mean, multiply, ones, sqrt, std +import numpy as np +import pymc.math as pmath +from numpy import array, c_, insert, mean, multiply, ones, sqrt, std from numpy.typing import ArrayLike from pydantic import ( Field, @@ -34,11 +36,10 @@ model_validator, validate_call, ) -from pymc import Bernoulli, Data, Deterministic, sample +from pymc import Bernoulli, Data, Deterministic, fit, sample from pymc import Model as PymcModel from pymc import StudentT as PymcStudentT -from pymc.math import sigmoid -from pytensor.tensor import dot +from pytensor.tensor import TensorVariable, dot from scipy.stats import t from pybandits.base import BinaryReward, Model, Probability, PyBanditsBaseModel @@ -231,16 +232,62 @@ class BaseBayesianLogisticRegression(Model): Parameters ---------- - alpha: StudentT + alpha : StudentT Student's t-distribution of the alpha coefficient. - betas: StudentT + betas : StudentT Student's t-distributions of the betas coefficients. - params_sample: Dict - Parameters for the function pymc.sample() + fast_inference : bool, defaults to False + Whether to utilize standard MCMC (False) or faster variational inference (True) + for the Bayesian inference on update steps. + update_kwargs : Optional[dict], uses default values if not specified + Additional arguments to pass to the update method. """ alpha: StudentT betas: List[StudentT] = Field(..., min_items=1) + fast_inference: bool = False + update_kwargs: Optional[dict] = None + _default_update_kwargs = dict(draws=1000, progressbar=False, return_inferencedata=False) + _default_mcmc_kwargs = dict( + tune=500, + draws=1000, + chains=2, + init="adapt_diag", + cores=1, + target_accept=0.95, + progressbar=False, + return_inferencedata=False, + ) + _default_variational_inference_kwargs = dict(method="advi") + + @model_validator(mode="after") + def arrange_update_kwargs(self): + if self.update_kwargs is None: + self.update_kwargs = self._default_update_kwargs + if self.fast_inference: + self.update_kwargs = {**self._default_variational_inference_kwargs, **self.update_kwargs} + else: + self.update_kwargs = {**self._default_mcmc_kwargs, **self.update_kwargs} + return self + + @classmethod + def _stable_sigmoid(cls, x: Union[np.ndarray, TensorVariable]) -> Union[np.ndarray, TensorVariable]: + """ + Vectorized sigmoid function that avoids overflow and underflow. + Compatible with both numpy and PyMC3 tensors. + Parameters + ---------- + x : Union[np.ndarray, TensorVariable] + Input values. + + Returns + ------- + prob : Union[np.ndarray, TensorVariable] + Sigmoid function applied to the input values. + """ + backend = np if isinstance(x, np.ndarray) else pmath + prob = backend.where(x >= 0, 1 / (1 + backend.exp(-x)), backend.exp(x) / (1 + backend.exp(x))) + return prob @validate_call(config=dict(arbitrary_types_allowed=True)) def check_context_matrix(self, context: ArrayLike): @@ -249,12 +296,12 @@ def check_context_matrix(self, context: ArrayLike): Parameters ---------- - context: ArrayLike of shape (n_samples, n_features) + context : ArrayLike of shape (n_samples, n_features) Matrix of contextual features. Returns ------- - context: pandas DataFrame of shape (n_samples, n_features) + context : pandas DataFrame of shape (n_samples, n_features) Matrix of contextual features. """ try: @@ -304,25 +351,12 @@ def sample_proba(self, context: ArrayLike) -> Tuple[Probability, float]: weighted_sum = multiply(context_ext, coeff.T).sum(axis=1) # compute the probability with the sigmoid function - prob = 1.0 / (1.0 + exp(-weighted_sum)) + prob = self._stable_sigmoid(weighted_sum) return prob, weighted_sum @validate_call(config=dict(arbitrary_types_allowed=True)) - def update( - self, - context: ArrayLike, - rewards: List[BinaryReward], - tune=500, - draws=1000, - chains=2, - init="adapt_diag", - cores=2, - target_accept=0.95, - progressbar=False, - return_inferencedata=False, - **kwargs, - ): + def update(self, context: ArrayLike, rewards: List[BinaryReward]): """ Update the model parameters. @@ -344,40 +378,39 @@ def update( # if model was never updated priors_parameters = default arguments # else priors_parameters are calculated from traces of the previous update alpha = PymcStudentT("alpha", mu=self.alpha.mu, sigma=self.alpha.sigma, nu=self.alpha.nu) - betas = [ - PymcStudentT("beta" + str(i), mu=self.betas[i].mu, sigma=self.betas[i].sigma, nu=self.betas[i].nu) - for i in range(len(self.betas)) - ] + beta_mu = [b.mu for b in self.betas] + beta_sigma = [b.sigma for b in self.betas] + beta_nu = [b.nu for b in self.betas] + betas = PymcStudentT("betas", mu=beta_mu, sigma=beta_sigma, nu=beta_nu, shape=len(self.betas)) - context = Data("context", context) - rewards = Data("rewards", rewards) + context = Data("context", context, mutable=False) + rewards = Data("rewards", rewards, mutable=False) # Likelihood (sampling distribution) of observations weighted_sum = Deterministic("weighted_sum", alpha + dot(betas, context.T)) - p = Deterministic("p", sigmoid(weighted_sum)) + p = Deterministic("p", self._stable_sigmoid(weighted_sum)) # Bernoulli random vector with probability of success given by sigmoid function and actual data as observed _ = Bernoulli("likelihood", p=p, observed=rewards) # update traces object by sampling from posterior distribution - trace = sample( - tune=tune, - draws=draws, - chains=chains, - init=init, - cores=cores, - target_accept=target_accept, - progressbar=progressbar, - return_inferencedata=return_inferencedata, - **kwargs, - ) + if self.fast_inference: + # variational inference + update_kwargs = self.update_kwargs.copy() + approx = fit(method=update_kwargs.pop("method")) + trace = approx.sample(**update_kwargs) + else: + # MCMC + trace = sample(**self.update_kwargs) # compute mean and std of the coefficients distributions self.alpha.mu = mean(trace["alpha"]) self.alpha.sigma = std(trace["alpha"], ddof=1) - for i in range(len(self.betas)): - self.betas[i].mu = mean(trace["beta" + str(i)]) - self.betas[i].sigma = std(trace["beta" + str(i)], ddof=1) + betas_mu = mean(trace["betas"], axis=0) + betas_std = std(trace["betas"], axis=0, ddof=1) + self.betas = [ + StudentT(mu=mu, sigma=sigma, nu=beta.nu) for mu, sigma, beta in zip(betas_mu, betas_std, self.betas) + ] class BayesianLogisticRegression(BaseBayesianLogisticRegression): @@ -392,12 +425,15 @@ class BayesianLogisticRegression(BaseBayesianLogisticRegression): Parameters ---------- - alpha: StudentT + alpha : StudentT Student's t-distribution of the alpha coefficient. - betas: StudentT + betas : StudentT Student's t-distributions of the betas coefficients. - params_sample: Dict - Parameters for the function pymc.sample() + fast_inference : bool, defaults to False + Whether to utilize standard MCMC (False) or faster variational inference (True) + for the Bayesian inference on update steps. + update_kwargs: Optional[dict], uses default values if not specified + Additional arguments to pass to the update method. """ @@ -417,8 +453,11 @@ class BayesianLogisticRegressionCC(BaseBayesianLogisticRegression): Student's t-distribution of the alpha coefficient. betas: StudentT Student's t-distributions of the betas coefficients. - params_sample: Dict - Parameters for the function pymc.sample() + fast_inference : bool, defaults to False + Whether to utilize standard MCMC (False) or faster variational inference (True) + for the Bayesian inference on update steps. + update_kwargs : Optional[dict], uses default values if not specified + Additional arguments to pass to the update method. cost: NonNegativeFloat Cost associated to the Bayesian Logistic Regression model. """ @@ -426,7 +465,9 @@ class BayesianLogisticRegressionCC(BaseBayesianLogisticRegression): cost: NonNegativeFloat -def create_bayesian_logistic_regression_cold_start(n_betas: PositiveInt) -> BayesianLogisticRegression: +def create_bayesian_logistic_regression_cold_start( + n_betas: PositiveInt, fast_inference: bool = False, update_kwargs: Optional[dict] = None +) -> BayesianLogisticRegression: """ Utility function to create a Bayesian Logistic Regression model, with default parameters. @@ -441,17 +482,27 @@ def create_bayesian_logistic_regression_cold_start(n_betas: PositiveInt) -> Baye n_betas : PositiveInt The number of betas of the Bayesian Logistic Regression model. This is also the number of features expected after in the context matrix. + fast_inference : bool, defaults to False + Whether to utilize standard MCMC (False) or faster variational inference (True) + for the Bayesian inference on update steps. + update_kwargs : Optional[dict], uses default values if not specified + Additional arguments to pass to the update method. Returns ------- blr: BayesianLogisticRegression The Bayesian Logistic Regression model. """ - return BayesianLogisticRegression(alpha=StudentT(), betas=[StudentT() for _ in range(n_betas)]) + return BayesianLogisticRegression( + alpha=StudentT(), + betas=[StudentT() for _ in range(n_betas)], + fast_inference=fast_inference, + update_kwargs=update_kwargs, + ) def create_bayesian_logistic_regression_cc_cold_start( - n_betas: PositiveInt, cost: NonNegativeFloat + n_betas: PositiveInt, cost: NonNegativeFloat, fast_inference: bool = False, update_kwargs: Optional[dict] = None ) -> BayesianLogisticRegressionCC: """ Utility function to create a Bayesian Logistic Regression model with cost control, with default parameters. @@ -469,10 +520,21 @@ def create_bayesian_logistic_regression_cc_cold_start( after in the context matrix. cost: NonNegativeFloat Cost associated to the Bayesian Logistic Regression model. + fast_inference : bool, defaults to False + Whether to utilize standard MCMC (False) or faster variational inference (True) + for the Bayesian inference on update steps. + update_kwargs : Optional[dict], uses default values if not specified + Additional arguments to pass to the update method. Returns ------- blr: BayesianLogisticRegressionCC The Bayesian Logistic Regression model. """ - return BayesianLogisticRegressionCC(alpha=StudentT(), betas=[StudentT() for _ in range(n_betas)], cost=cost) + return BayesianLogisticRegressionCC( + alpha=StudentT(), + betas=[StudentT() for _ in range(n_betas)], + cost=cost, + fast_inference=fast_inference, + update_kwargs=update_kwargs, + ) diff --git a/pyproject.toml b/pyproject.toml index 2304681..912d75d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pybandits" -version = "0.4.1" +version = "0.5.0" description = "Python Multi-Armed Bandit Library" authors = [ "Dario d'Andrea ", diff --git a/tests/test_cmab.py b/tests/test_cmab.py index 5fe15e4..e434ffd 100644 --- a/tests/test_cmab.py +++ b/tests/test_cmab.py @@ -148,18 +148,30 @@ def test_cmab_init_with_wrong_blr_models(a, b): ) -def test_cmab_update(n_samples=100, n_features=3): +@settings(deadline=60000) +@given(st.just(100), st.just(3), st.booleans()) +def test_cmab_update(n_samples, n_features, fast_inference): actions = np.random.choice(["a1", "a2"], size=n_samples).tolist() rewards = np.random.choice([0, 1], size=n_samples).tolist() def run_update(context): - mab = create_cmab_bernoulli_cold_start(action_ids={"a1", "a2"}, n_features=n_features) + mab = create_cmab_bernoulli_cold_start( + action_ids={"a1", "a2"}, n_features=n_features, fast_inference=fast_inference + ) assert all( - [mab.actions[a] == create_bayesian_logistic_regression_cold_start(n_betas=n_features) for a in set(actions)] + [ + mab.actions[a] + == create_bayesian_logistic_regression_cold_start(n_betas=n_features, fast_inference=fast_inference) + for a in set(actions) + ] ) mab.update(context=context, actions=actions, rewards=rewards) assert all( - [mab.actions[a] != create_bayesian_logistic_regression_cold_start(n_betas=n_features) for a in set(actions)] + [ + mab.actions[a] + != create_bayesian_logistic_regression_cold_start(n_betas=n_features, fast_inference=fast_inference) + for a in set(actions) + ] ) assert not mab.predict_actions_randomly @@ -179,26 +191,40 @@ def run_update(context): run_update(context=context) -def test_cmab_update_not_all_actions(n_samples=100, n_feat=3): +@settings(deadline=10000) +@given(st.just(100), st.just(3), st.booleans()) +def test_cmab_update_not_all_actions(n_samples, n_feat, fast_inference): actions = np.random.choice(["a3", "a4"], size=n_samples).tolist() rewards = np.random.choice([0, 1], size=n_samples).tolist() context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_feat)) - mab = create_cmab_bernoulli_cold_start(action_ids={"a1", "a2", "a3", "a4"}, n_features=n_feat) + mab = create_cmab_bernoulli_cold_start( + action_ids={"a1", "a2", "a3", "a4"}, n_features=n_feat, fast_inference=fast_inference + ) mab.update(context=context, actions=actions, rewards=rewards) - assert mab.actions["a1"] == create_bayesian_logistic_regression_cold_start(n_betas=n_feat) - assert mab.actions["a2"] == create_bayesian_logistic_regression_cold_start(n_betas=n_feat) - assert mab.actions["a3"] != create_bayesian_logistic_regression_cold_start(n_betas=n_feat) - assert mab.actions["a4"] != create_bayesian_logistic_regression_cold_start(n_betas=n_feat) + assert mab.actions["a1"] == create_bayesian_logistic_regression_cold_start( + n_betas=n_feat, fast_inference=fast_inference + ) + assert mab.actions["a2"] == create_bayesian_logistic_regression_cold_start( + n_betas=n_feat, fast_inference=fast_inference + ) + assert mab.actions["a3"] != create_bayesian_logistic_regression_cold_start( + n_betas=n_feat, fast_inference=fast_inference + ) + assert mab.actions["a4"] != create_bayesian_logistic_regression_cold_start( + n_betas=n_feat, fast_inference=fast_inference + ) @settings(deadline=500) -@given(st.integers(min_value=1, max_value=1000), st.integers(min_value=1, max_value=100)) -def test_cmab_update_shape_mismatch(n_samples, n_features): +@given(st.integers(min_value=1, max_value=1000), st.integers(min_value=1, max_value=100), st.booleans()) +def test_cmab_update_shape_mismatch(n_samples, n_features, fast_inference): actions = np.random.choice(["a1", "a2"], size=n_samples).tolist() rewards = np.random.choice([0, 1], size=n_samples).tolist() context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - mab = create_cmab_bernoulli_cold_start(action_ids={"a1", "a2"}, n_features=n_features) + mab = create_cmab_bernoulli_cold_start( + action_ids={"a1", "a2"}, n_features=n_features, fast_inference=fast_inference + ) with pytest.raises(AttributeError): # actions shape mismatch mab.update(context=context, actions=actions[1:], rewards=rewards) @@ -369,6 +395,7 @@ def test_cmab_get_state(mu, sigma, n_features): min_size=3, max_size=3, ), + "fast_inference": st.booleans(), }, ), min_size=2, @@ -381,8 +408,8 @@ def test_cmab_from_state(state): cmab = CmabBernoulli.from_state(state) assert isinstance(cmab, CmabBernoulli) - expected_actions = state["actions"] actual_actions = to_serializable_dict(cmab.actions) # Normalize the dict + expected_actions = {k: {**v, **state["actions"][k]} for k, v in actual_actions.items()} assert expected_actions == actual_actions # Ensure get_state and from_state compatibility @@ -513,18 +540,30 @@ def test_cmab_bai_predict(n_samples, n_features): assert len(selected_actions) == len(probs) == len(weighted_sums) == n_samples -def test_cmab_bai_update(n_samples=100, n_features=3): +@settings(deadline=10000) +@given(st.just(100), st.just(3), st.booleans()) +def test_cmab_bai_update(n_samples, n_features, fast_inference): actions = np.random.choice(["a1", "a2"], size=n_samples).tolist() rewards = np.random.choice([0, 1], size=n_samples).tolist() context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - mab = create_cmab_bernoulli_bai_cold_start(action_ids={"a1", "a2"}, n_features=n_features) + mab = create_cmab_bernoulli_bai_cold_start( + action_ids={"a1", "a2"}, n_features=n_features, fast_inference=fast_inference + ) assert mab.predict_actions_randomly assert all( - [mab.actions[a] == create_bayesian_logistic_regression_cold_start(n_betas=n_features) for a in set(actions)] + [ + mab.actions[a] + == create_bayesian_logistic_regression_cold_start(n_betas=n_features, fast_inference=fast_inference) + for a in set(actions) + ] ) mab.update(context=context, actions=actions, rewards=rewards) assert all( - [mab.actions[a] != create_bayesian_logistic_regression_cold_start(n_betas=n_features) for a in set(actions)] + [ + mab.actions[a] + != create_bayesian_logistic_regression_cold_start(n_betas=n_features, fast_inference=fast_inference) + for a in set(actions) + ] ) assert not mab.predict_actions_randomly @@ -587,6 +626,7 @@ def test_cmab_bai_get_state(mu, sigma, n_features, exploit_p: Float01): min_size=3, max_size=3, ), + "fast_inference": st.booleans(), }, ), min_size=2, @@ -603,9 +643,10 @@ def test_cmab_bai_from_state(state): cmab = CmabBernoulliBAI.from_state(state) assert isinstance(cmab, CmabBernoulliBAI) - expected_actions = state["actions"] actual_actions = to_serializable_dict(cmab.actions) # Normalize the dict + expected_actions = {k: {**v, **state["actions"][k]} for k, v in actual_actions.items()} assert expected_actions == actual_actions + expected_exploit_p = ( state["strategy"].get("exploit_p", 0.5) if state["strategy"].get("exploit_p") is not None else 0.5 ) # Covers both not existing and existing + None @@ -743,22 +784,32 @@ def test_cmab_cc_predict(n_samples, n_features): assert len(selected_actions) == len(probs) == len(weighted_sums) == n_samples -def test_cmab_cc_update(n_samples=100, n_features=3): +@settings(deadline=10000) +@given(st.just(100), st.just(3), st.booleans()) +def test_cmab_cc_update(n_samples, n_features, fast_inference): actions = np.random.choice(["a1", "a2"], size=n_samples).tolist() rewards = np.random.choice([0, 1], size=n_samples).tolist() context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - mab = create_cmab_bernoulli_cc_cold_start(action_ids_cost={"a1": 10, "a2": 10}, n_features=n_features) + mab = create_cmab_bernoulli_cc_cold_start( + action_ids_cost={"a1": 10, "a2": 10}, n_features=n_features, fast_inference=fast_inference + ) assert mab.predict_actions_randomly assert all( [ - mab.actions[a] == create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10) + mab.actions[a] + == create_bayesian_logistic_regression_cc_cold_start( + n_betas=n_features, cost=10, fast_inference=fast_inference + ) for a in set(actions) ] ) mab.update(context=context, actions=actions, rewards=rewards) assert all( [ - mab.actions[a] != create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=10) + mab.actions[a] + != create_bayesian_logistic_regression_cc_cold_start( + n_betas=n_features, cost=10, fast_inference=fast_inference + ) for a in set(actions) ] ) @@ -830,6 +881,7 @@ def test_cmab_cc_get_state( max_size=3, ), "cost": st.floats(min_value=0), + "fast_inference": st.booleans(), }, ), min_size=2, @@ -846,9 +898,10 @@ def test_cmab_cc_from_state(state): cmab = CmabBernoulliCC.from_state(state) assert isinstance(cmab, CmabBernoulliCC) - expected_actions = state["actions"] actual_actions = to_serializable_dict(cmab.actions) # Normalize the dict + expected_actions = {k: {**v, **state["actions"][k]} for k, v in actual_actions.items()} assert expected_actions == actual_actions + expected_subsidy_factor = ( state["strategy"].get("subsidy_factor", 0.5) if state["strategy"].get("subsidy_factor") is not None else 0.5 ) # Covers both not existing and existing + None diff --git a/tests/test_execution_time.py b/tests/test_execution_time.py deleted file mode 100644 index 647d6ac..0000000 --- a/tests/test_execution_time.py +++ /dev/null @@ -1,316 +0,0 @@ -# # MIT License -# # -# # Copyright (c) 2022 Playtika Ltd. -# # -# # Permission is hereby granted, free of charge, to any person obtaining a copy -# # of this software and associated documentation files (the "Software"), to deal -# # in the Software without restriction, including without limitation the rights -# # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# # copies of the Software, and to permit persons to whom the Software is -# # furnished to do so, subject to the following conditions: -# # -# # The above copyright notice and this permission notice shall be included in all -# # copies or substantial portions of the Software. -# # -# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# # SOFTWARE. - -# import time - -# import numpy as np - -# from pybandits.core.cmab import Cmab - -# verbose = True - - -# def run_cmab_predict_streaming(n_actions, n_features, n_samples, n_iterations, n_jobs, sampling, params_sample, -# verbose=False): -# """ -# This function executes the following steps: -# - initialize cmab with input params -# - simulate first batch of users with actions and rewards -# - update cmab with first batch -# - predict 1 sample at time (i.e. in streaming) with sampling (sampling=True) or without (sampling=False) -# - return the mean and std of the prediction time. -# """ - -# # params -# size_first_batch = 1000 -# actions_ids = ['action' + str(i + 1) for i in range(n_actions)] - -# # init model -# cmab = Cmab(n_features=n_features, actions_ids=actions_ids, n_jobs=n_jobs, params_sample=params_sample) - -# # simulate first batch -# X = 2 * np.random.random_sample((size_first_batch, n_features)) - 1 # float in the interval (-1, 1) -# actions, _ = cmab.predict(X) -# rewards = np.random.randint(2, size=size_first_batch) - -# # update -# start = time.time() -# cmab.update(X=X, actions=actions, rewards=rewards) -# end = time.time() -# t = end - start -# if verbose: -# print('\nUpdate with n_actions = {}, n_features = {}, size_first_batch = {}. Time = {:.6f} sec.' -# .format(n_actions, n_features, size_first_batch, t)) - -# # predict 1 sample at time -# t = [] -# for i in range(n_iterations): -# x = 2 * np.random.random_sample((n_samples, n_features)) - 1 # floats in the interval (-1, 1) -# if sampling: -# start = time.time() -# _, _ = cmab.predict(x) -# end = time.time() -# else: -# start = time.time() -# _, _ = cmab.fast_predict(x) -# end = time.time() -# t.append(end-start) -# mu_t, simga_t = np.mean(t), np.std(t) - -# if verbose: -# print('Predict of n_actions={}, n_features={}, n_samples={}, n_iterations={}, sampling={}. ' -# '\nmean execution time = {:.6f} sec, std execution time = {:.6f} sec ' -# .format(n_actions, n_features, n_samples, n_iterations, sampling, mu_t, simga_t)) - -# return mu_t, simga_t - - -# def test_cmab_time_predict_before_update(): -# """ Test cmab.predict() in steaming before the first update(). """ -# # input -# n_iteration = 10000 -# n_actions = 1000 -# n_samples = 1 -# n_features = 1000 -# actions_ids = ['action' + str(i + 1) for i in range(n_actions)] -# params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95, -# 'progressbar': False} - -# # init model -# cmab = Cmab(n_features=n_features, actions_ids=actions_ids, params_sample=params_sample) - -# # predict -# t = [] -# for i in range(n_iteration): -# x = 2 * np.random.random_sample((n_samples, n_features)) - 1 # float in the interval (-1, 1) -# start = time.time() -# _, _ = cmab.predict(x) -# end = time.time() -# t.append(end - start) -# mu_t, simga_t = np.mean(t), np.std(t) - -# if verbose: -# print('\nPredict before the first update of n_samples={}, n_actions={}, n_features={}, n_iteration={}' -# '\nmean execution time = {:.6f} sec, std execution time = {:.6f} sec ' -# .format(n_samples, n_iteration, n_actions, n_features, mu_t, simga_t)) - - -# # test with fast predict - -# def test_cmab_time_predict_2_2_1_fp(): -# """ Test cmab.fast_predict() in steaming after the first update(). """ -# # input -# n_actions = 2 -# n_features = 2 -# n_samples = 1 -# n_iterations = 10 -# n_jobs = n_actions -# sampling = False -# params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95, -# 'progressbar': False} - -# # run test -# mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples, -# n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling, -# params_sample=params_sample, verbose=verbose) - - -# def test_cmab_time_predict_2_2_10000_fp(): -# """ Test cmab.fast_predict() in steaming after the first update(). """ -# # input -# n_actions = 2 -# n_features = 2 -# n_samples = 10000 -# n_iterations = 10 -# n_jobs = n_actions -# sampling = False -# params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95, -# 'progressbar': False} - -# # run test -# mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples, -# n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling, -# params_sample=params_sample, verbose=verbose) - - -# def test_cmab_time_predict_2_5_10000_fp(): -# """ Test cmab.fast_predict() in steaming after the first update(). """ -# # input -# n_actions = 2 -# n_features = 5 -# n_samples = 10000 -# n_iterations = 10 -# n_jobs = n_actions -# sampling = False -# params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95, -# 'progressbar': False} - -# # run test -# mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples, -# n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling, -# params_sample=params_sample, verbose=verbose) - - -# def test_cmab_time_predict_2_100_10000_fp(): -# """ Test cmab.fast_predict() in steaming after the first update(). """ -# # input -# n_actions = 2 -# n_features = 100 -# n_samples = 10000 -# n_iterations = 10 -# n_jobs = n_actions -# sampling = False -# params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95, -# 'progressbar': False} - -# # run test -# mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples, -# n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling, -# params_sample=params_sample, verbose=verbose) - - -# def test_cmab_time_predict_5_2_10000_fp(): -# """ Test cmab.fast_predict() in steaming after the first update(). """ -# # input -# n_actions = 5 -# n_features = 2 -# n_samples = 10000 -# n_iterations = 10 -# n_jobs = n_actions -# sampling = False -# params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95, -# 'progressbar': False} - -# # run test -# mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples, -# n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling, -# params_sample=params_sample, verbose=verbose) - - -# def test_cmab_time_predict_20_2_1_fp(): -# """ Test cmab.fast_predict() in steaming after the first update(). """ -# # input -# n_actions = 20 -# n_features = 2 -# n_samples = 1 -# n_iterations = 10 -# n_jobs = n_actions -# sampling = False -# params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95, -# 'progressbar': False} - -# # run test -# mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples, -# n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling, -# params_sample=params_sample, verbose=verbose) - - -# def test_cmab_time_predict_20_100_1_fp(): -# """ Test cmab.fast_predict() in steaming after the first update(). """ -# # input -# n_actions = 20 -# n_features = 100 -# n_samples = 1 -# n_iterations = 10 -# n_jobs = n_actions -# sampling = False -# params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95, -# 'progressbar': False} - -# # run test -# mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples, -# n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling, -# params_sample=params_sample, verbose=verbose) - - -# # test with sampling - -# def test_cmab_time_predict_2_2_1_w_s(): -# """ Test cmab.predict() in steaming after the first update(). """ -# # input -# n_actions = 2 -# n_features = 2 -# n_samples = 1 -# n_iterations = 10 -# n_jobs = n_actions -# sampling = True -# params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95, -# 'progressbar': False} - -# # run test -# mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples, -# n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling, -# params_sample=params_sample, verbose=verbose) - - -# def test_cmab_time_predict_2_2_10000_w_s(): -# """ Test cmab.predict() in steaming after the first update(). """ -# # input -# n_actions = 2 -# n_features = 2 -# n_samples = 10000 -# n_iterations = 10 -# n_jobs = n_actions -# sampling = True -# params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95, -# 'progressbar': False} - -# # run test -# mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples, -# n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling, -# params_sample=params_sample, verbose=verbose) - - -# def test_cmab_time_predict_2_5_10000_w_s(): -# """ Test cmab.predict() in steaming after the first update(). """ -# # input -# n_actions = 2 -# n_features = 5 -# n_samples = 10000 -# n_iterations = 10 -# n_jobs = n_actions -# sampling = True -# params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95, -# 'progressbar': False} - -# # run test -# mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples, -# n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling, -# params_sample=params_sample, verbose=verbose) - - -# def test_cmab_time_predict_2_100_10000_w_s(): -# """ Test cmab.predict() in steaming after the first update(). """ -# # input -# n_actions = 2 -# n_features = 100 -# n_samples = 10000 -# n_iterations = 10 -# n_jobs = n_actions -# sampling = True -# params_sample = {'tune': 500, 'draws': 1000, 'chains': 2, 'init': 'adapt_diag', 'cores': 1, 'target_accept': 0.95, -# 'progressbar': False} - -# # run test -# mu_t, simga_t = run_cmab_predict_streaming(n_actions=n_actions, n_features=n_features, n_samples=n_samples, -# n_iterations=n_iterations, n_jobs=n_jobs, sampling=sampling, -# params_sample=params_sample, verbose=verbose)