Skip to content

Commit

Permalink
Optional sigmoid transformation and bugfix (#44)
Browse files Browse the repository at this point in the history
* initial commit

Signed-off-by: bkleyn <bernard.kleynhans@fmr.com>

* change parameter name

---------

Signed-off-by: bkleyn <bernard.kleynhans@fmr.com>
Co-authored-by: wddcheng <du.cheng@fmr.com>
  • Loading branch information
bkleyn and wddcheng authored Mar 7, 2024
1 parent 9d9b45a commit cd38ecd
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 46 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,17 @@
CHANGELOG
=========

-------------------------------------------------------------------------------
February, 22, 2024 1.3.0
-------------------------------------------------------------------------------

Major:
- Added optional `apply_sigmoid` argument to recommend() method, to
control whether sigmoid transformation is applied to scores or not.
Minor:
- Fixed bug when recommending single context.


-------------------------------------------------------------------------------
February, 05, 2024 1.2.1
-------------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion mab2rec/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@

__author__ = "FMR LLC"
__email__ = "opensource@fmr.com"
__version__ = "1.2.1"
__version__ = "1.3.0"
__copyright__ = "Copyright (C), FMR LLC"
14 changes: 9 additions & 5 deletions mab2rec/rec.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def predict_expectations(self, contexts: Union[None, List[List[Num]], np.ndarray
return self.mab.predict_expectations(contexts)

def recommend(self, contexts: Union[None, List[List[Num]], np.ndarray, pd.Series, pd.DataFrame] = None,
excluded_arms: List[List[Arm]] = None, return_scores: bool = False) \
excluded_arms: List[List[Arm]] = None, return_scores: bool = False, apply_sigmoid: bool = True) \
-> Union[Union[List[Arm], Tuple[List[Arm], List[Num]],
Union[List[List[Arm]], Tuple[List[List[Arm]], List[List[Num]]]]]]:
"""Generate _top-k_ recommendations based on the expected reward.
Expand All @@ -306,6 +306,8 @@ def recommend(self, contexts: Union[None, List[List[Num]], np.ndarray, pd.Series
List of list of arms to exclude from recommended arms.
return_scores : bool, default=False
Return score for each recommended item.
apply_sigmoid : bool, default=True
Whether to apply sigmoid transformation to scores before ranking.
Returns
-------
Expand All @@ -315,15 +317,17 @@ def recommend(self, contexts: Union[None, List[List[Num]], np.ndarray, pd.Series
self._validate_get_rec(contexts, excluded_arms)

# Get predicted expectations
if contexts is None:
num_contexts = 1
num_contexts = len(contexts) if contexts is not None else 1
if num_contexts == 1:
expectations = [self.mab.predict_expectations(contexts)]
else:
num_contexts = len(contexts)
expectations = self.mab.predict_expectations(contexts)

# Take sigmoid of expectations so that values are between 0 and 1
expectations = expit(pd.DataFrame(expectations)[self.mab.arms].values)
if apply_sigmoid:
expectations = expit(pd.DataFrame(expectations)[self.mab.arms].values)
else:
expectations = pd.DataFrame(expectations)[self.mab.arms].values

# Create an exclusion mask, where exclusion_mask[context_ind][arm_ind] denotes if the arm with the
# index arm_ind was excluded for context with the index context_ind.
Expand Down
3 changes: 2 additions & 1 deletion tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def predict(arms: List[Arm],
NeighborhoodPolicy.TreeBandit] = None,
context_history: Union[None, List[Num], List[List[Num]], np.ndarray, pd.DataFrame, pd.Series] = None,
contexts: Union[None, List[Num], List[List[Num]], np.ndarray, pd.DataFrame, pd.Series] = None,
apply_sigmoid: bool = True,
excluded_arms: List[List[Arm]] = None,
warm_start: bool = False,
arm_to_features: Dict[Arm, List[Num]] = None,
Expand All @@ -103,7 +104,7 @@ def predict(arms: List[Arm],
rec.warm_start(arm_to_features, distance_quantile=0.5)

# Run
recommendations = rec.recommend(contexts, excluded_arms, return_scores=True)
recommendations = rec.recommend(contexts, excluded_arms, return_scores=True, apply_sigmoid=apply_sigmoid)

return recommendations, rec

Expand Down
139 changes: 100 additions & 39 deletions tests/test_rec.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ def test_parametric_learning_policies(self):
learning_policy=lp,
contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
top_k=2,
seed=123456)

Expand All @@ -49,9 +49,9 @@ def test_neighborhood_policies(self):
neighborhood_policy=cp,
contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
top_k=2,
seed=123456)
for lp in self.para_lps:
Expand All @@ -64,9 +64,9 @@ def test_neighborhood_policies(self):
neighborhood_policy=cp,
contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
top_k=2,
seed=123456)

Expand All @@ -89,9 +89,9 @@ def test_parametric_learning_policies_predict(self):
learning_policy=lp,
contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
top_k=2,
seed=123456)
rec.predict([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [1, 2, 1, 1, 3]])
Expand All @@ -109,9 +109,9 @@ def test_neighborhood_policies_predict(self):
neighborhood_policy=cp,
contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
top_k=2,
seed=123456)
rec.predict([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [1, 2, 1, 1, 3]])
Expand All @@ -126,14 +126,75 @@ def test_neighborhood_policies_predict(self):
neighborhood_policy=cp,
contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
top_k=2,
seed=123456)
rec.predict([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [1, 2, 1, 1, 3]])
rec.predict_expectations([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [1, 2, 1, 1, 3]])

def test_learning_policies_no_sigmoid(self):
for lp in self.lps:
self.predict(arms=[1, 2, 3],
decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
learning_policy=lp,
contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
apply_sigmoid=False,
top_k=2,
seed=123456)

def test_parametric_learning_policies_no_sigmoid(self):
for lp in self.para_lps:
self.predict(arms=[1, 2, 3],
decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
learning_policy=lp,
contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
apply_sigmoid=False,
top_k=2,
seed=123456)

def test_neighborhood_policies_recommend_no_sigmoid(self):
for cp in self.nps:
for lp in self.lps:
if not self.is_compatible(lp, cp):
continue
self.predict(arms=[1, 2, 3],
decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
learning_policy=lp,
neighborhood_policy=cp,
contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
apply_sigmoid=False,
top_k=2,
seed=123456)
for lp in self.para_lps:
if not self.is_compatible(lp, cp):
continue
self.predict(arms=[1, 2, 3],
decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
learning_policy=lp,
neighborhood_policy=cp,
contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
apply_sigmoid=False,
top_k=2,
seed=123456)

def test_learning_policies_partial_fit(self):
for lp in self.lps:
_, rec = self.predict(arms=[1, 2, 3],
Expand All @@ -152,9 +213,9 @@ def test_parametric_learning_policies_partial_fit(self):
learning_policy=lp,
contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
top_k=2,
seed=123456)
rec.partial_fit(decisions=[1, 1, 2], rewards=[0, 1, 0],
Expand All @@ -172,9 +233,9 @@ def test_neighborhood_policies_partial_fit(self):
neighborhood_policy=cp,
contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
top_k=2,
seed=123456)
rec.partial_fit(decisions=[1, 1, 2], rewards=[0, 1, 0],
Expand All @@ -189,9 +250,9 @@ def test_neighborhood_policies_partial_fit(self):
neighborhood_policy=cp,
contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
top_k=2,
seed=123456)
rec.partial_fit(decisions=[1, 1, 2], rewards=[0, 1, 0],
Expand All @@ -217,9 +278,9 @@ def test_parametric_learning_policies_warm_start(self):
learning_policy=lp,
contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
top_k=2,
seed=123456,
warm_start=True,
Expand All @@ -237,9 +298,9 @@ def test_neighborhood_policies_warm_start(self):
neighborhood_policy=cp,
contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
top_k=2,
seed=123456,
warm_start=True,
Expand All @@ -254,9 +315,9 @@ def test_neighborhood_policies_warm_start(self):
neighborhood_policy=cp,
contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
top_k=2,
seed=123456,
warm_start=True,
Expand Down Expand Up @@ -364,9 +425,9 @@ def test_parametric_learning_policies_set_arms(self):
learning_policy=lp,
contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
[0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
[0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
[0, 2, 1, 0, 0]],
top_k=2,
seed=123456)
rec.set_arms([2, 5])
Expand Down

0 comments on commit cd38ecd

Please sign in to comment.