@@ -212,8 +212,13 @@ class CmabBernoulli(BaseCmabBernoulli):
212
212
predict_with_proba : bool = False
213
213
predict_actions_randomly : bool = False
214
214
215
- def __init__ (self , actions : Dict [ActionId , BaseBayesianLogisticRegression ]):
216
- super ().__init__ (actions = actions , strategy = ClassicBandit ())
215
+ def __init__ (
216
+ self ,
217
+ actions : Dict [ActionId , BaseBayesianLogisticRegression ],
218
+ epsilon : Optional [Float01 ] = None ,
219
+ default_action : Optional [ActionId ] = None ,
220
+ ):
221
+ super ().__init__ (actions = actions , strategy = ClassicBandit (), epsilon = epsilon , default_action = default_action )
217
222
218
223
@classmethod
219
224
def from_state (cls , state : dict ) -> "CmabBernoulli" :
@@ -249,9 +254,15 @@ class CmabBernoulliBAI(BaseCmabBernoulli):
249
254
predict_with_proba : bool = False
250
255
predict_actions_randomly : bool = False
251
256
252
- def __init__ (self , actions : Dict [ActionId , BayesianLogisticRegression ], exploit_p : Optional [Float01 ] = None ):
257
+ def __init__ (
258
+ self ,
259
+ actions : Dict [ActionId , BayesianLogisticRegression ],
260
+ epsilon : Optional [Float01 ] = None ,
261
+ default_action : Optional [ActionId ] = None ,
262
+ exploit_p : Optional [Float01 ] = None ,
263
+ ):
253
264
strategy = BestActionIdentification () if exploit_p is None else BestActionIdentification (exploit_p = exploit_p )
254
- super ().__init__ (actions = actions , strategy = strategy )
265
+ super ().__init__ (actions = actions , strategy = strategy , epsilon = epsilon , default_action = default_action )
255
266
256
267
@classmethod
257
268
def from_state (cls , state : dict ) -> "CmabBernoulliBAI" :
@@ -296,9 +307,15 @@ class CmabBernoulliCC(BaseCmabBernoulli):
296
307
predict_with_proba : bool = True
297
308
predict_actions_randomly : bool = False
298
309
299
- def __init__ (self , actions : Dict [ActionId , BayesianLogisticRegressionCC ], subsidy_factor : Optional [Float01 ] = None ):
310
+ def __init__ (
311
+ self ,
312
+ actions : Dict [ActionId , BayesianLogisticRegressionCC ],
313
+ epsilon : Optional [Float01 ] = None ,
314
+ default_action : Optional [ActionId ] = None ,
315
+ subsidy_factor : Optional [Float01 ] = None ,
316
+ ):
300
317
strategy = CostControlBandit () if subsidy_factor is None else CostControlBandit (subsidy_factor = subsidy_factor )
301
- super ().__init__ (actions = actions , strategy = strategy )
318
+ super ().__init__ (actions = actions , strategy = strategy , epsilon = epsilon , default_action = default_action )
302
319
303
320
@classmethod
304
321
def from_state (cls , state : dict ) -> "CmabBernoulliCC" :
@@ -310,7 +327,12 @@ def update(self, context: ArrayLike, actions: List[ActionId], rewards: List[Bina
310
327
311
328
312
329
@validate_arguments
313
- def create_cmab_bernoulli_cold_start (action_ids : Set [ActionId ], n_features : PositiveInt ) -> CmabBernoulli :
330
+ def create_cmab_bernoulli_cold_start (
331
+ action_ids : Set [ActionId ],
332
+ n_features : PositiveInt ,
333
+ epsilon : Optional [Float01 ] = None ,
334
+ default_action : Optional [ActionId ] = None ,
335
+ ) -> CmabBernoulli :
314
336
"""
315
337
Utility function to create a Contextual Bernoulli Multi-Armed Bandit with Thompson Sampling, with default
316
338
parameters. Until the very first update the model will predict actions randomly, where each action has equal
@@ -323,6 +345,10 @@ def create_cmab_bernoulli_cold_start(action_ids: Set[ActionId], n_features: Posi
323
345
n_features: PositiveInt
324
346
The number of features expected after in the context matrix. This is also the number of betas of the
325
347
Bayesian Logistic Regression model.
348
+ epsilon: Optional[Float01]
349
+ epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used.
350
+ default_action: Optional[ActionId]
351
+ Default action to select if the epsilon-greedy approach is used. None for random selection.
326
352
Returns
327
353
-------
328
354
cmab: CmabBernoulli
@@ -331,14 +357,18 @@ def create_cmab_bernoulli_cold_start(action_ids: Set[ActionId], n_features: Posi
331
357
actions = {}
332
358
for a in set (action_ids ):
333
359
actions [a ] = create_bayesian_logistic_regression_cold_start (n_betas = n_features )
334
- mab = CmabBernoulli (actions = actions )
360
+ mab = CmabBernoulli (actions = actions , epsilon = epsilon , default_action = default_action )
335
361
mab .predict_actions_randomly = True
336
362
return mab
337
363
338
364
339
365
@validate_arguments
340
366
def create_cmab_bernoulli_bai_cold_start (
341
- action_ids : Set [ActionId ], n_features : PositiveInt , exploit_p : Optional [Float01 ] = None
367
+ action_ids : Set [ActionId ],
368
+ n_features : PositiveInt ,
369
+ exploit_p : Optional [Float01 ] = None ,
370
+ epsilon : Optional [Float01 ] = None ,
371
+ default_action : Optional [ActionId ] = None ,
342
372
) -> CmabBernoulliBAI :
343
373
"""
344
374
Utility function to create a Contextual Bernoulli Multi-Armed Bandit with Thompson Sampling, and Best Action
@@ -361,6 +391,10 @@ def create_cmab_bernoulli_bai_cold_start(
361
391
(it behaves as a Greedy strategy).
362
392
If exploit_p is 0, the bandits always select the action with 2nd highest probability of getting a positive
363
393
reward.
394
+ epsilon: Optional[Float01]
395
+ epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used.
396
+ default_action: Optional[ActionId]
397
+ Default action to select if the epsilon-greedy approach is used. None for random selection.
364
398
365
399
Returns
366
400
-------
@@ -370,7 +404,7 @@ def create_cmab_bernoulli_bai_cold_start(
370
404
actions = {}
371
405
for a in set (action_ids ):
372
406
actions [a ] = create_bayesian_logistic_regression_cold_start (n_betas = n_features )
373
- mab = CmabBernoulliBAI (actions = actions , exploit_p = exploit_p )
407
+ mab = CmabBernoulliBAI (actions = actions , exploit_p = exploit_p , epsilon = epsilon , default_action = default_action )
374
408
mab .predict_actions_randomly = True
375
409
return mab
376
410
@@ -380,6 +414,8 @@ def create_cmab_bernoulli_cc_cold_start(
380
414
action_ids_cost : Dict [ActionId , NonNegativeFloat ],
381
415
n_features : PositiveInt ,
382
416
subsidy_factor : Optional [Float01 ] = None ,
417
+ epsilon : Optional [Float01 ] = None ,
418
+ default_action : Optional [ActionId ] = None ,
383
419
) -> CmabBernoulliCC :
384
420
"""
385
421
Utility function to create a Stochastic Bernoulli Multi-Armed Bandit with Thompson Sampling, and Cost Control
@@ -408,6 +444,10 @@ def create_cmab_bernoulli_cc_cold_start(
408
444
If subsidy_factor is 1, the bandits always selects the action with the minimum cost.
409
445
If subsidy_factor is 0, the bandits always selects the action with highest probability of getting a positive
410
446
reward (it behaves as a classic Bernoulli bandit).
447
+ epsilon: Optional[Float01]
448
+ epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used.
449
+ default_action: Optional[ActionId]
450
+ Default action to select if the epsilon-greedy approach is used. None for random selection.
411
451
412
452
Returns
413
453
-------
@@ -417,6 +457,8 @@ def create_cmab_bernoulli_cc_cold_start(
417
457
actions = {}
418
458
for a , cost in action_ids_cost .items ():
419
459
actions [a ] = create_bayesian_logistic_regression_cc_cold_start (n_betas = n_features , cost = cost )
420
- mab = CmabBernoulliCC (actions = actions , subsidy_factor = subsidy_factor )
460
+ mab = CmabBernoulliCC (
461
+ actions = actions , subsidy_factor = subsidy_factor , epsilon = epsilon , default_action = default_action
462
+ )
421
463
mab .predict_actions_randomly = True
422
464
return mab
0 commit comments