Skip to content

Commit

Permalink
enhancement/blender logging (#158)
Browse files Browse the repository at this point in the history
* refactor blending, log current score
* add eps to blending coord descend
  • Loading branch information
dev-rinchin authored Jul 31, 2024
1 parent 1db3fb8 commit 8eb1a18
Showing 1 changed file with 38 additions and 29 deletions.
67 changes: 38 additions & 29 deletions lightautoml/automl/blend.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,8 +327,18 @@ def _get_weighted_pred(self, splitted_preds: Sequence[NumpyDataset], wts: Option

return outp

def _get_candidate(self, wts: np.ndarray, idx: int, value: float):
def _get_candidate(self, wts: np.ndarray, idx: int, value: float) -> np.ndarray:
"""Replaces the value at the idx position in the input array with a new input value, \
modifies the values in all other positions to keep the total sum of the array equal to 1.
Args:
wts (np.ndarray): input array
idx (int): index
value (float): new value
Returns:
np.ndarray: new array with sum equal to 1.
"""
candidate = wts.copy()
sl = np.arange(wts.shape[0]) != idx
s = candidate[sl].sum()
Expand Down Expand Up @@ -357,54 +367,53 @@ def scorer(x):

return scorer

def _optimize(self, splitted_preds: Sequence[NumpyDataset]) -> np.ndarray:
def _optimize(self, splitted_preds: Sequence[NumpyDataset], eps: float = 1e-7) -> np.ndarray:
"""Perform coordinate descent.
length = len(splitted_preds)
candidate = np.ones(length, dtype=np.float32) / length
pre_candidate = candidate
best_pred = self._get_weighted_pred(splitted_preds, candidate)
Args:
splitted_preds (Sequence[NumpyDataset]): predictions for weighting to maximize the metric.
eps (float): epsilon.
Returns:
np.ndarray: best weights.
"""
# the set of initial weights for blending
candidate = np.ones(len(splitted_preds), dtype=np.float32) / len(splitted_preds)

best_pred = self._get_weighted_pred(splitted_preds, candidate)
best_score = self.score(best_pred)
logger.info("Blending: optimization starts with equal weights and score \x1b[1m{0}\x1b[0m".format(best_score))
score = best_score
iter_best_score = None
iter_best_weights = None
for _ in range(self.max_iters):
best_weights = candidate
logger.info(f"Blending: optimization starts with equal weights. Score = \x1b[1m{best_score}\x1b[0m")

for iteration_num in range(self.max_iters):
flg_no_upd = True
for i in range(len(splitted_preds)):
if candidate[i] == 1:
for weights_idx in range(len(splitted_preds)):
if candidate[weights_idx] == 1:
continue

obj = self._get_scorer(splitted_preds, i, candidate)
opt_res = minimize_scalar(
obj,
self._get_scorer(splitted_preds, weights_idx, candidate),
method="Bounded",
bounds=(0, 1),
options={"disp": False, "maxiter": self.max_inner_iters},
)
w = opt_res.x
new_weight = opt_res.x
score = -opt_res.fun
pre_candidate = self._get_candidate(candidate, i, w)
if i == 0 or iter_best_score < score:
iter_best_score = score
iter_best_weights = pre_candidate
if score > best_score:
candidate = self._get_candidate(candidate, weights_idx, new_weight)

if score - eps > best_score:
flg_no_upd = False
best_score = score
# if w < self.max_nonzero_coef:
# w = 0

candidate = pre_candidate
best_weights = candidate

logger.info(
"Blending: iteration \x1b[1m{0}\x1b[0m: score = \x1b[1m{1}\x1b[0m, weights = \x1b[1m{2}\x1b[0m".format(
_, iter_best_score, iter_best_weights
)
f"Blending: iteration \x1b[1m{iteration_num}\x1b[0m: score = \x1b[1m{score}\x1b[0m, weights = \x1b[1m{candidate}\x1b[0m"
)

if flg_no_upd:
logger.info("Blending: no score update. Terminated\n")
logger.info("Blending: no improvements for score. Terminated.\n")
break
logger.info(f"Blending: best score = \x1b[1m{best_score}\x1b[0m, best weights = \x1b[1m{best_weights}\x1b[0m")

return candidate

Expand Down

0 comments on commit 8eb1a18

Please sign in to comment.