From cd48ecbd8f238cbaa7375bf6be33c453a0d9f66f Mon Sep 17 00:00:00 2001
From: Mr-Neutr0n <64578610+Mr-Neutr0n@users.noreply.github.com>
Date: Thu, 12 Feb 2026 00:08:11 +0530
Subject: [PATCH] Fix ZeroDivisionError and NaN propagation in reward model
 metrics

The kendall_tau() and spearmanr() functions divide by bsize without
checking if it is zero. This causes a ZeroDivisionError when all
labels are padding (-100) or when the input batch has no valid
label groups.

Additionally, when a label group has fewer than 2 ranked items,
scipy's kendalltau/spearmanr return NaN, which silently propagates
through the accumulated score and corrupts the final metric value.

Changes:
- Skip label groups with fewer than 2 items (correlation is
  undefined for single-element arrays)
- Only increment bsize for groups that produce a valid (non-NaN)
  correlation result
- Return 0.0 instead of dividing by zero when bsize is 0
- Guard reward_accuracy() against empty score arrays, which would
  cause np.mean to return NaN on an empty array
---
 model/model_training/metrics.py | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/model/model_training/metrics.py b/model/model_training/metrics.py
index 7a533c851f..801f6b393a 100644
--- a/model/model_training/metrics.py
+++ b/model/model_training/metrics.py
@@ -15,6 +15,15 @@ def reward_accuracy(eval_pred):
             logits_batch = b_logits[b_labels == i]
             pos_scores.append(logits_batch[0])
             neg_scores.append(logits_batch[-1])
+
+    if len(pos_scores) == 0:
+        return {
+            "pos_score": 0.0,
+            "neg_score": 0.0,
+            "score_diff": 0.0,
+            "accuracy": 0.0,
+        }
+
     pos_scores = np.array(pos_scores).reshape(-1, 1)
     neg_scores = np.array(neg_scores).reshape(-1, 1)
 
@@ -43,10 +52,17 @@ def kendall_tau(eval_pred):
         # b_logits = b_logits[:truncated_logits]
         for i in np.unique(b_labels):
             logits_batch = b_logits[b_labels == i]
+            if logits_batch.size < 2:
+                continue
             pred_rank = np.argsort(logits_batch)
             true_rank = np.arange(logits_batch.size - 1, -1, -1)
-            tau += st.kendalltau(pred_rank, true_rank)[0]
-        bsize += np.unique(b_labels).size
+            result = st.kendalltau(pred_rank, true_rank)[0]
+            if not np.isnan(result):
+                tau += result
+                bsize += 1
+
+    if bsize == 0:
+        return {"kendalltau": 0.0}
 
     return {"kendalltau": tau / bsize}
 
@@ -61,10 +77,17 @@ def spearmanr(eval_pred):
         b_logits = b_logits[b_logits != -100]
         for i in np.unique(b_labels):
             logits_batch = b_logits[b_labels == i]
+            if logits_batch.size < 2:
+                continue
             pred_rank = np.argsort(logits_batch)
             true_rank = np.arange(logits_batch.size - 1, -1, -1)
-            score += st.spearmanr(pred_rank, true_rank).statistic
-        bsize += np.unique(b_labels).size
+            result = st.spearmanr(pred_rank, true_rank).statistic
+            if not np.isnan(result):
+                score += result
+                bsize += 1
+
+    if bsize == 0:
+        return {"spearmanr": 0.0}
 
     return {"spearmanr": score / bsize}