Skip to content

Commit af9c8eb

Browse files
committed
Epsilon Greedy Strategy Wrapper
Change log: 1. Added pytest_mock for mocking pytests in pyproject.toml 2. Added _make_epsilon_greedy functionality as a static method of Strategy in base.py. The method wraps the native select_action of the strategy with epsilon_greedy approach. 3. Added epsilon and default_action to all smab.py and cmab.py classes and cold start methods 4. Added test suite for the epsilon greedy functionlity
1 parent b3d295b commit af9c8eb

File tree

5 files changed

+419
-30
lines changed

5 files changed

+419
-30
lines changed

pybandits/base.py

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@
2222

2323

2424
from abc import ABC, abstractmethod
25-
from typing import Any, Dict, List, NewType, Optional, Set, Tuple, Union
25+
from typing import Any, Dict, List, NewType, Optional, Set, Tuple, Union, Callable
2626

27+
import numpy as np
2728
from pydantic import (
2829
BaseModel,
2930
Extra,
@@ -33,8 +34,10 @@
3334
constr,
3435
validate_arguments,
3536
validator,
37+
root_validator,
3638
)
3739

40+
3841
ActionId = NewType("ActionId", constr(min_length=1))
3942
Float01 = NewType("Float_0_1", confloat(ge=0, le=1))
4043
Probability = NewType("Probability", Float01)
@@ -74,6 +77,47 @@ class Strategy(PyBanditsBaseModel, ABC):
7477
Strategy to select actions in multi-armed bandits.
7578
"""
7679

80+
epsilon: Optional[Float01]
81+
default_action: Optional[ActionId]
82+
83+
def __init__(self, **data: Any) -> None:
84+
super().__init__(**data)
85+
if self.epsilon is not None:
86+
self.select_action = self._make_epsilon_greedy(self.epsilon, self.default_action, self.select_action)
87+
88+
@staticmethod
89+
@validate_arguments
90+
def _make_epsilon_greedy(epsilon: Float01, default_action: Optional[ActionId], select_action: Callable):
91+
"""
92+
Wraps a select_action function with epsilon-greedy strategy.
93+
94+
Parameters
95+
----------
96+
epsilon: Float_0_1
97+
Number in [0, 1] which specifies the probability of selecting a default_action.
98+
default_action: Optional[ActionId]
99+
The default action to be selected with probability epsilon. None for random action.
100+
select_action
101+
The function to be wrapped.
102+
103+
Returns
104+
-------
105+
select_epsilon_greedy_action: Callable
106+
The wrapped function.
107+
"""
108+
109+
def select_epsilon_greedy_action(
110+
p: Dict[ActionId, float],
111+
actions: Optional[Dict[ActionId, Model]] = None,
112+
):
113+
if default_action and default_action not in p.keys():
114+
raise KeyError(f"Default action {default_action} not in actions.")
115+
if np.random.binomial(1, epsilon):
116+
return default_action if default_action else np.random.choice(list(p.keys()))
117+
return select_action(p=p, actions=actions)
118+
119+
return select_epsilon_greedy_action
120+
77121
@abstractmethod
78122
def select_action(self, p: Dict[ActionId, Probability], actions: Optional[Dict[ActionId, Model]]) -> ActionId:
79123
"""
@@ -91,10 +135,14 @@ class BaseMab(PyBanditsBaseModel, ABC):
91135
The list of possible actions, and their associated Model.
92136
strategy: Strategy
93137
The strategy used to select actions.
138+
epsilon: Optional[Float01], defaults to 0
139+
The probability of selecting a random action.
94140
"""
95141

96142
actions: Dict[ActionId, Model]
97143
strategy: Strategy
144+
epsilon: Optional[Float01] = None
145+
default_action: Optional[ActionId]
98146

99147
@validator("actions", pre=True)
100148
@classmethod

pybandits/cmab.py

Lines changed: 53 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -212,8 +212,13 @@ class CmabBernoulli(BaseCmabBernoulli):
212212
predict_with_proba: bool = False
213213
predict_actions_randomly: bool = False
214214

215-
def __init__(self, actions: Dict[ActionId, BaseBayesianLogisticRegression]):
216-
super().__init__(actions=actions, strategy=ClassicBandit())
215+
def __init__(
216+
self,
217+
actions: Dict[ActionId, BaseBayesianLogisticRegression],
218+
epsilon: Optional[Float01] = None,
219+
default_action: Optional[ActionId] = None,
220+
):
221+
super().__init__(actions=actions, strategy=ClassicBandit(), epsilon=epsilon, default_action=default_action)
217222

218223
@classmethod
219224
def from_state(cls, state: dict) -> "CmabBernoulli":
@@ -249,9 +254,15 @@ class CmabBernoulliBAI(BaseCmabBernoulli):
249254
predict_with_proba: bool = False
250255
predict_actions_randomly: bool = False
251256

252-
def __init__(self, actions: Dict[ActionId, BayesianLogisticRegression], exploit_p: Optional[Float01] = None):
257+
def __init__(
258+
self,
259+
actions: Dict[ActionId, BayesianLogisticRegression],
260+
epsilon: Optional[Float01] = None,
261+
default_action: Optional[ActionId] = None,
262+
exploit_p: Optional[Float01] = None,
263+
):
253264
strategy = BestActionIdentification() if exploit_p is None else BestActionIdentification(exploit_p=exploit_p)
254-
super().__init__(actions=actions, strategy=strategy)
265+
super().__init__(actions=actions, strategy=strategy, epsilon=epsilon, default_action=default_action)
255266

256267
@classmethod
257268
def from_state(cls, state: dict) -> "CmabBernoulliBAI":
@@ -296,9 +307,15 @@ class CmabBernoulliCC(BaseCmabBernoulli):
296307
predict_with_proba: bool = True
297308
predict_actions_randomly: bool = False
298309

299-
def __init__(self, actions: Dict[ActionId, BayesianLogisticRegressionCC], subsidy_factor: Optional[Float01] = None):
310+
def __init__(
311+
self,
312+
actions: Dict[ActionId, BayesianLogisticRegressionCC],
313+
epsilon: Optional[Float01] = None,
314+
default_action: Optional[ActionId] = None,
315+
subsidy_factor: Optional[Float01] = None,
316+
):
300317
strategy = CostControlBandit() if subsidy_factor is None else CostControlBandit(subsidy_factor=subsidy_factor)
301-
super().__init__(actions=actions, strategy=strategy)
318+
super().__init__(actions=actions, strategy=strategy, epsilon=epsilon, default_action=default_action)
302319

303320
@classmethod
304321
def from_state(cls, state: dict) -> "CmabBernoulliCC":
@@ -310,7 +327,12 @@ def update(self, context: ArrayLike, actions: List[ActionId], rewards: List[Bina
310327

311328

312329
@validate_arguments
313-
def create_cmab_bernoulli_cold_start(action_ids: Set[ActionId], n_features: PositiveInt) -> CmabBernoulli:
330+
def create_cmab_bernoulli_cold_start(
331+
action_ids: Set[ActionId],
332+
n_features: PositiveInt,
333+
epsilon: Optional[Float01] = None,
334+
default_action: Optional[ActionId] = None,
335+
) -> CmabBernoulli:
314336
"""
315337
Utility function to create a Contextual Bernoulli Multi-Armed Bandit with Thompson Sampling, with default
316338
parameters. Until the very first update the model will predict actions randomly, where each action has equal
@@ -323,6 +345,10 @@ def create_cmab_bernoulli_cold_start(action_ids: Set[ActionId], n_features: Posi
323345
n_features: PositiveInt
324346
The number of features expected after in the context matrix. This is also the number of betas of the
325347
Bayesian Logistic Regression model.
348+
epsilon: Optional[Float01]
349+
epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used.
350+
default_action: Optional[ActionId]
351+
Default action to select if the epsilon-greedy approach is used. None for random selection.
326352
Returns
327353
-------
328354
cmab: CmabBernoulli
@@ -331,14 +357,18 @@ def create_cmab_bernoulli_cold_start(action_ids: Set[ActionId], n_features: Posi
331357
actions = {}
332358
for a in set(action_ids):
333359
actions[a] = create_bayesian_logistic_regression_cold_start(n_betas=n_features)
334-
mab = CmabBernoulli(actions=actions)
360+
mab = CmabBernoulli(actions=actions, epsilon=epsilon, default_action=default_action)
335361
mab.predict_actions_randomly = True
336362
return mab
337363

338364

339365
@validate_arguments
340366
def create_cmab_bernoulli_bai_cold_start(
341-
action_ids: Set[ActionId], n_features: PositiveInt, exploit_p: Optional[Float01] = None
367+
action_ids: Set[ActionId],
368+
n_features: PositiveInt,
369+
exploit_p: Optional[Float01] = None,
370+
epsilon: Optional[Float01] = None,
371+
default_action: Optional[ActionId] = None,
342372
) -> CmabBernoulliBAI:
343373
"""
344374
Utility function to create a Contextual Bernoulli Multi-Armed Bandit with Thompson Sampling, and Best Action
@@ -361,6 +391,10 @@ def create_cmab_bernoulli_bai_cold_start(
361391
(it behaves as a Greedy strategy).
362392
If exploit_p is 0, the bandits always select the action with 2nd highest probability of getting a positive
363393
reward.
394+
epsilon: Optional[Float01]
395+
epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used.
396+
default_action: Optional[ActionId]
397+
Default action to select if the epsilon-greedy approach is used. None for random selection.
364398
365399
Returns
366400
-------
@@ -370,7 +404,7 @@ def create_cmab_bernoulli_bai_cold_start(
370404
actions = {}
371405
for a in set(action_ids):
372406
actions[a] = create_bayesian_logistic_regression_cold_start(n_betas=n_features)
373-
mab = CmabBernoulliBAI(actions=actions, exploit_p=exploit_p)
407+
mab = CmabBernoulliBAI(actions=actions, exploit_p=exploit_p, epsilon=epsilon, default_action=default_action)
374408
mab.predict_actions_randomly = True
375409
return mab
376410

@@ -380,6 +414,8 @@ def create_cmab_bernoulli_cc_cold_start(
380414
action_ids_cost: Dict[ActionId, NonNegativeFloat],
381415
n_features: PositiveInt,
382416
subsidy_factor: Optional[Float01] = None,
417+
epsilon: Optional[Float01] = None,
418+
default_action: Optional[ActionId] = None,
383419
) -> CmabBernoulliCC:
384420
"""
385421
Utility function to create a Stochastic Bernoulli Multi-Armed Bandit with Thompson Sampling, and Cost Control
@@ -408,6 +444,10 @@ def create_cmab_bernoulli_cc_cold_start(
408444
If subsidy_factor is 1, the bandits always selects the action with the minimum cost.
409445
If subsidy_factor is 0, the bandits always selects the action with highest probability of getting a positive
410446
reward (it behaves as a classic Bernoulli bandit).
447+
epsilon: Optional[Float01]
448+
epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used.
449+
default_action: Optional[ActionId]
450+
Default action to select if the epsilon-greedy approach is used. None for random selection.
411451
412452
Returns
413453
-------
@@ -417,6 +457,8 @@ def create_cmab_bernoulli_cc_cold_start(
417457
actions = {}
418458
for a, cost in action_ids_cost.items():
419459
actions[a] = create_bayesian_logistic_regression_cc_cold_start(n_betas=n_features, cost=cost)
420-
mab = CmabBernoulliCC(actions=actions, subsidy_factor=subsidy_factor)
460+
mab = CmabBernoulliCC(
461+
actions=actions, subsidy_factor=subsidy_factor, epsilon=epsilon, default_action=default_action
462+
)
421463
mab.predict_actions_randomly = True
422464
return mab

0 commit comments

Comments
 (0)