Optional sigmoid transformation and bugfix (#44)

* initial commit Signed-off-by: bkleyn <bernard.kleynhans@fmr.com> * change parameter name --------- Signed-off-by: bkleyn <bernard.kleynhans@fmr.com> Co-authored-by: wddcheng <du.cheng@fmr.com>
fidelity · Mar 7, 2024 · cd38ecd · cd38ecd
1 parent 9d9b45a
commit cd38ecd
Show file tree

Hide file tree

Showing 5 changed files with 123 additions and 46 deletions.
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
@@ -2,6 +2,17 @@
 CHANGELOG
 =========
 
+-------------------------------------------------------------------------------
+February, 22, 2024 1.3.0
+-------------------------------------------------------------------------------
+
+Major:
+- Added optional `apply_sigmoid` argument to recommend() method, to
+  control whether sigmoid transformation is applied to scores or not.
+Minor:
+- Fixed bug when recommending single context.
+
+
 -------------------------------------------------------------------------------
 February, 05, 2024 1.2.1
 -------------------------------------------------------------------------------

diff --git a/mab2rec/_version.py b/mab2rec/_version.py
@@ -4,5 +4,5 @@
 
 __author__ = "FMR LLC"
 __email__ = "opensource@fmr.com"
-__version__ = "1.2.1"
+__version__ = "1.3.0"
 __copyright__ = "Copyright (C), FMR LLC"
diff --git a/mab2rec/rec.py b/mab2rec/rec.py
@@ -288,7 +288,7 @@ def predict_expectations(self, contexts: Union[None, List[List[Num]], np.ndarray
         return self.mab.predict_expectations(contexts)
 
     def recommend(self, contexts: Union[None, List[List[Num]], np.ndarray, pd.Series, pd.DataFrame] = None,
-                  excluded_arms: List[List[Arm]] = None, return_scores: bool = False) \
+                  excluded_arms: List[List[Arm]] = None, return_scores: bool = False, apply_sigmoid: bool = True) \
             -> Union[Union[List[Arm], Tuple[List[Arm], List[Num]],
                      Union[List[List[Arm]], Tuple[List[List[Arm]], List[List[Num]]]]]]:
         """Generate _top-k_ recommendations based on the expected reward.
@@ -306,6 +306,8 @@ def recommend(self, contexts: Union[None, List[List[Num]], np.ndarray, pd.Series
             List of list of arms to exclude from recommended arms.
         return_scores : bool, default=False
             Return score for each recommended item.
+        apply_sigmoid : bool, default=True
+            Whether to apply sigmoid transformation to scores before ranking.
 
         Returns
         -------
@@ -315,15 +317,17 @@ def recommend(self, contexts: Union[None, List[List[Num]], np.ndarray, pd.Series
         self._validate_get_rec(contexts, excluded_arms)
 
         # Get predicted expectations
-        if contexts is None:
-            num_contexts = 1
+        num_contexts = len(contexts) if contexts is not None else 1
+        if num_contexts == 1:
             expectations = [self.mab.predict_expectations(contexts)]
         else:
-            num_contexts = len(contexts)
             expectations = self.mab.predict_expectations(contexts)
 
         # Take sigmoid of expectations so that values are between 0 and 1
-        expectations = expit(pd.DataFrame(expectations)[self.mab.arms].values)
+        if apply_sigmoid:
+            expectations = expit(pd.DataFrame(expectations)[self.mab.arms].values)
+        else:
+            expectations = pd.DataFrame(expectations)[self.mab.arms].values
 
         # Create an exclusion mask, where exclusion_mask[context_ind][arm_ind] denotes if the arm with the
         # index arm_ind was excluded for context with the index context_ind.

diff --git a/tests/test_base.py b/tests/test_base.py
@@ -81,6 +81,7 @@ def predict(arms: List[Arm],
                                            NeighborhoodPolicy.TreeBandit] = None,
                 context_history: Union[None, List[Num], List[List[Num]], np.ndarray, pd.DataFrame, pd.Series] = None,
                 contexts: Union[None, List[Num], List[List[Num]], np.ndarray, pd.DataFrame, pd.Series] = None,
+                apply_sigmoid: bool = True,
                 excluded_arms: List[List[Arm]] = None,
                 warm_start: bool = False,
                 arm_to_features: Dict[Arm, List[Num]] = None,
@@ -103,7 +104,7 @@ def predict(arms: List[Arm],
             rec.warm_start(arm_to_features, distance_quantile=0.5)
 
         # Run
-        recommendations = rec.recommend(contexts, excluded_arms, return_scores=True)
+        recommendations = rec.recommend(contexts, excluded_arms, return_scores=True, apply_sigmoid=apply_sigmoid)
 
         return recommendations, rec
 

diff --git a/tests/test_rec.py b/tests/test_rec.py
@@ -31,9 +31,9 @@ def test_parametric_learning_policies(self):
                          learning_policy=lp,
                          contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                          context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
-                                       [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
-                                       [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
-                                       [0, 2, 1, 0, 0]],
+                                          [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
+                                          [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
+                                          [0, 2, 1, 0, 0]],
                          top_k=2,
                          seed=123456)
 
@@ -49,9 +49,9 @@ def test_neighborhood_policies(self):
                              neighborhood_policy=cp,
                              contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                              context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
-                                           [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
-                                           [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
-                                           [0, 2, 1, 0, 0]],
+                                              [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
+                                              [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
+                                              [0, 2, 1, 0, 0]],
                              top_k=2,
                              seed=123456)
             for lp in self.para_lps:
@@ -64,9 +64,9 @@ def test_neighborhood_policies(self):
                              neighborhood_policy=cp,
                              contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                              context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
-                                           [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
-                                           [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
-                                           [0, 2, 1, 0, 0]],
+                                              [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
+                                              [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
+                                              [0, 2, 1, 0, 0]],
                              top_k=2,
                              seed=123456)
 
@@ -89,9 +89,9 @@ def test_parametric_learning_policies_predict(self):
                                   learning_policy=lp,
                                   contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                                   context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
-                                                [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
-                                                [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
-                                                [0, 2, 1, 0, 0]],
+                                                   [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
+                                                   [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
+                                                   [0, 2, 1, 0, 0]],
                                   top_k=2,
                                   seed=123456)
             rec.predict([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [1, 2, 1, 1, 3]])
@@ -109,9 +109,9 @@ def test_neighborhood_policies_predict(self):
                                       neighborhood_policy=cp,
                                       contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                                       context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
-                                                    [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
-                                                    [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
-                                                    [0, 2, 1, 0, 0]],
+                                                       [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
+                                                       [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
+                                                       [0, 2, 1, 0, 0]],
                                       top_k=2,
                                       seed=123456)
                 rec.predict([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [1, 2, 1, 1, 3]])
@@ -126,14 +126,75 @@ def test_neighborhood_policies_predict(self):
                                       neighborhood_policy=cp,
                                       contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                                       context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
-                                                    [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
-                                                    [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
-                                                    [0, 2, 1, 0, 0]],
+                                                       [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
+                                                       [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
+                                                       [0, 2, 1, 0, 0]],
                                       top_k=2,
                                       seed=123456)
                 rec.predict([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [1, 2, 1, 1, 3]])
                 rec.predict_expectations([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [1, 2, 1, 1, 3]])
 
+    def test_learning_policies_no_sigmoid(self):
+        for lp in self.lps:
+            self.predict(arms=[1, 2, 3],
+                         decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
+                         rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
+                         learning_policy=lp,
+                         contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
+                         apply_sigmoid=False,
+                         top_k=2,
+                         seed=123456)
+
+    def test_parametric_learning_policies_no_sigmoid(self):
+        for lp in self.para_lps:
+            self.predict(arms=[1, 2, 3],
+                         decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
+                         rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
+                         learning_policy=lp,
+                         contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
+                         context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
+                                          [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
+                                          [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
+                                          [0, 2, 1, 0, 0]],
+                         apply_sigmoid=False,
+                         top_k=2,
+                         seed=123456)
+
+    def test_neighborhood_policies_recommend_no_sigmoid(self):
+        for cp in self.nps:
+            for lp in self.lps:
+                if not self.is_compatible(lp, cp):
+                    continue
+                self.predict(arms=[1, 2, 3],
+                             decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
+                             rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
+                             learning_policy=lp,
+                             neighborhood_policy=cp,
+                             contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
+                             context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
+                                              [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
+                                              [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
+                                              [0, 2, 1, 0, 0]],
+                             apply_sigmoid=False,
+                             top_k=2,
+                             seed=123456)
+            for lp in self.para_lps:
+                if not self.is_compatible(lp, cp):
+                    continue
+                self.predict(arms=[1, 2, 3],
+                             decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
+                             rewards=[0, 1, 1, 0, 0, 0, 0, 1, 1, 1],
+                             learning_policy=lp,
+                             neighborhood_policy=cp,
+                             contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
+                             context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
+                                              [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
+                                              [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
+                                              [0, 2, 1, 0, 0]],
+                             apply_sigmoid=False,
+                             top_k=2,
+                             seed=123456)
+
     def test_learning_policies_partial_fit(self):
         for lp in self.lps:
             _, rec = self.predict(arms=[1, 2, 3],
@@ -152,9 +213,9 @@ def test_parametric_learning_policies_partial_fit(self):
                                   learning_policy=lp,
                                   contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                                   context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
-                                                [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
-                                                [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
-                                                [0, 2, 1, 0, 0]],
+                                                   [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
+                                                   [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
+                                                   [0, 2, 1, 0, 0]],
                                   top_k=2,
                                   seed=123456)
             rec.partial_fit(decisions=[1, 1, 2], rewards=[0, 1, 0],
@@ -172,9 +233,9 @@ def test_neighborhood_policies_partial_fit(self):
                                       neighborhood_policy=cp,
                                       contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                                       context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
-                                                    [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
-                                                    [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
-                                                    [0, 2, 1, 0, 0]],
+                                                       [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
+                                                       [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
+                                                       [0, 2, 1, 0, 0]],
                                       top_k=2,
                                       seed=123456)
                 rec.partial_fit(decisions=[1, 1, 2], rewards=[0, 1, 0],
@@ -189,9 +250,9 @@ def test_neighborhood_policies_partial_fit(self):
                                       neighborhood_policy=cp,
                                       contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                                       context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
-                                                    [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
-                                                    [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
-                                                    [0, 2, 1, 0, 0]],
+                                                       [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
+                                                       [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
+                                                       [0, 2, 1, 0, 0]],
                                       top_k=2,
                                       seed=123456)
                 rec.partial_fit(decisions=[1, 1, 2], rewards=[0, 1, 0],
@@ -217,9 +278,9 @@ def test_parametric_learning_policies_warm_start(self):
                          learning_policy=lp,
                          contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                          context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
-                                       [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
-                                       [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
-                                       [0, 2, 1, 0, 0]],
+                                          [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
+                                          [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
+                                          [0, 2, 1, 0, 0]],
                          top_k=2,
                          seed=123456,
                          warm_start=True,
@@ -237,9 +298,9 @@ def test_neighborhood_policies_warm_start(self):
                              neighborhood_policy=cp,
                              contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                              context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
-                                           [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
-                                           [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
-                                           [0, 2, 1, 0, 0]],
+                                              [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
+                                              [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
+                                              [0, 2, 1, 0, 0]],
                              top_k=2,
                              seed=123456,
                              warm_start=True,
@@ -254,9 +315,9 @@ def test_neighborhood_policies_warm_start(self):
                              neighborhood_policy=cp,
                              contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                              context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
-                                           [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
-                                           [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
-                                           [0, 2, 1, 0, 0]],
+                                              [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
+                                              [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
+                                              [0, 2, 1, 0, 0]],
                              top_k=2,
                              seed=123456,
                              warm_start=True,
@@ -364,9 +425,9 @@ def test_parametric_learning_policies_set_arms(self):
                                   learning_policy=lp,
                                   contexts=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]],
                                   context_history=[[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0],
-                                                [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
-                                                [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
-                                                [0, 2, 1, 0, 0]],
+                                                   [0, 2, 2, 3, 5], [1, 3, 1, 1, 1], [0, 0, 0, 0, 0],
+                                                   [0, 1, 4, 3, 5], [0, 1, 2, 4, 5], [1, 2, 1, 1, 3],
+                                                   [0, 2, 1, 0, 0]],
                                   top_k=2,
                                   seed=123456)
             rec.set_arms([2, 5])