enhancement/blender logging (#158)

* refactor blending, log current score * add eps to blending coord descend
sb-ai-lab · Jul 31, 2024 · 8eb1a18 · 8eb1a18
1 parent 1db3fb8
commit 8eb1a18
Showing 1 changed file with 38 additions and 29 deletions.
diff --git a/lightautoml/automl/blend.py b/lightautoml/automl/blend.py
@@ -327,8 +327,18 @@ def _get_weighted_pred(self, splitted_preds: Sequence[NumpyDataset], wts: Option
 
         return outp
 
-    def _get_candidate(self, wts: np.ndarray, idx: int, value: float):
+    def _get_candidate(self, wts: np.ndarray, idx: int, value: float) -> np.ndarray:
+        """Replaces the value at the idx position in the input array with a new input value, \
+        modifies the values in all other positions to keep the total sum of the array equal to 1.
 
+        Args:
+            wts (np.ndarray): input array
+            idx (int): index
+            value (float): new value
+
+        Returns:
+            np.ndarray: new array with sum equal to 1.
+        """
         candidate = wts.copy()
         sl = np.arange(wts.shape[0]) != idx
         s = candidate[sl].sum()
@@ -357,54 +367,53 @@ def scorer(x):
 
         return scorer
 
-    def _optimize(self, splitted_preds: Sequence[NumpyDataset]) -> np.ndarray:
+    def _optimize(self, splitted_preds: Sequence[NumpyDataset], eps: float = 1e-7) -> np.ndarray:
+        """Perform coordinate descent.
 
-        length = len(splitted_preds)
-        candidate = np.ones(length, dtype=np.float32) / length
-        pre_candidate = candidate
-        best_pred = self._get_weighted_pred(splitted_preds, candidate)
+        Args:
+            splitted_preds (Sequence[NumpyDataset]): predictions for weighting to maximize the metric.
+            eps (float): epsilon.
 
+        Returns:
+            np.ndarray: best weights.
+        """
+        # the set of initial weights for blending
+        candidate = np.ones(len(splitted_preds), dtype=np.float32) / len(splitted_preds)
+
+        best_pred = self._get_weighted_pred(splitted_preds, candidate)
         best_score = self.score(best_pred)
-        logger.info("Blending: optimization starts with equal weights and score \x1b[1m{0}\x1b[0m".format(best_score))
-        score = best_score
-        iter_best_score = None
-        iter_best_weights = None
-        for _ in range(self.max_iters):
+        best_weights = candidate
+        logger.info(f"Blending: optimization starts with equal weights. Score = \x1b[1m{best_score}\x1b[0m")
+
+        for iteration_num in range(self.max_iters):
             flg_no_upd = True
-            for i in range(len(splitted_preds)):
-                if candidate[i] == 1:
+            for weights_idx in range(len(splitted_preds)):
+                if candidate[weights_idx] == 1:
                     continue
 
-                obj = self._get_scorer(splitted_preds, i, candidate)
                 opt_res = minimize_scalar(
-                    obj,
+                    self._get_scorer(splitted_preds, weights_idx, candidate),
                     method="Bounded",
                     bounds=(0, 1),
                     options={"disp": False, "maxiter": self.max_inner_iters},
                 )
-                w = opt_res.x
+                new_weight = opt_res.x
                 score = -opt_res.fun
-                pre_candidate = self._get_candidate(candidate, i, w)
-                if i == 0 or iter_best_score < score:
-                    iter_best_score = score
-                    iter_best_weights = pre_candidate
-                if score > best_score:
+                candidate = self._get_candidate(candidate, weights_idx, new_weight)
+
+                if score - eps > best_score:
                     flg_no_upd = False
                     best_score = score
-                    # if w < self.max_nonzero_coef:
-                    #     w = 0
-
-                    candidate = pre_candidate
+                    best_weights = candidate
 
             logger.info(
-                "Blending: iteration \x1b[1m{0}\x1b[0m: score = \x1b[1m{1}\x1b[0m, weights = \x1b[1m{2}\x1b[0m".format(
-                    _, iter_best_score, iter_best_weights
-                )
+                f"Blending: iteration \x1b[1m{iteration_num}\x1b[0m: score = \x1b[1m{score}\x1b[0m, weights = \x1b[1m{candidate}\x1b[0m"
             )
 
             if flg_no_upd:
-                logger.info("Blending: no score update. Terminated\n")
+                logger.info("Blending: no improvements for score. Terminated.\n")
                 break
+        logger.info(f"Blending: best score = \x1b[1m{best_score}\x1b[0m, best weights = \x1b[1m{best_weights}\x1b[0m")
 
         return candidate