From cd48ecbd8f238cbaa7375bf6be33c453a0d9f66f Mon Sep 17 00:00:00 2001 From: Mr-Neutr0n <64578610+Mr-Neutr0n@users.noreply.github.com> Date: Thu, 12 Feb 2026 00:08:11 +0530 Subject: [PATCH] Fix ZeroDivisionError and NaN propagation in reward model metrics The kendall_tau() and spearmanr() functions divide by bsize without checking if it is zero. This causes a ZeroDivisionError when all labels are padding (-100) or when the input batch has no valid label groups. Additionally, when a label group has fewer than 2 ranked items, scipy's kendalltau/spearmanr return NaN, which silently propagates through the accumulated score and corrupts the final metric value. Changes: - Skip label groups with fewer than 2 items (correlation is undefined for single-element arrays) - Only increment bsize for groups that produce a valid (non-NaN) correlation result - Return 0.0 instead of dividing by zero when bsize is 0 - Guard reward_accuracy() against empty score arrays, which would cause np.mean to return NaN on an empty array --- model/model_training/metrics.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/model/model_training/metrics.py b/model/model_training/metrics.py index 7a533c851f..801f6b393a 100644 --- a/model/model_training/metrics.py +++ b/model/model_training/metrics.py @@ -15,6 +15,15 @@ def reward_accuracy(eval_pred): logits_batch = b_logits[b_labels == i] pos_scores.append(logits_batch[0]) neg_scores.append(logits_batch[-1]) + + if len(pos_scores) == 0: + return { + "pos_score": 0.0, + "neg_score": 0.0, + "score_diff": 0.0, + "accuracy": 0.0, + } + pos_scores = np.array(pos_scores).reshape(-1, 1) neg_scores = np.array(neg_scores).reshape(-1, 1) @@ -43,10 +52,17 @@ def kendall_tau(eval_pred): # b_logits = b_logits[:truncated_logits] for i in np.unique(b_labels): logits_batch = b_logits[b_labels == i] + if logits_batch.size < 2: + continue pred_rank = np.argsort(logits_batch) true_rank = np.arange(logits_batch.size - 1, -1, -1) - tau += st.kendalltau(pred_rank, true_rank)[0] - bsize += np.unique(b_labels).size + result = st.kendalltau(pred_rank, true_rank)[0] + if not np.isnan(result): + tau += result + bsize += 1 + + if bsize == 0: + return {"kendalltau": 0.0} return {"kendalltau": tau / bsize} @@ -61,10 +77,17 @@ def spearmanr(eval_pred): b_logits = b_logits[b_logits != -100] for i in np.unique(b_labels): logits_batch = b_logits[b_labels == i] + if logits_batch.size < 2: + continue pred_rank = np.argsort(logits_batch) true_rank = np.arange(logits_batch.size - 1, -1, -1) - score += st.spearmanr(pred_rank, true_rank).statistic - bsize += np.unique(b_labels).size + result = st.spearmanr(pred_rank, true_rank).statistic + if not np.isnan(result): + score += result + bsize += 1 + + if bsize == 0: + return {"spearmanr": 0.0} return {"spearmanr": score / bsize}