From 494984b3cce0b94d2649646790a80834f554ed62 Mon Sep 17 00:00:00 2001 From: Rowan Stein Date: Fri, 26 Dec 2025 13:32:36 +0000 Subject: [PATCH 1/2] ROC: use dtype-aware sentinel threshold for float scores via np.nextafter; keep +1 for integers. Add non-regression tests for probability and integer cases. Ref #53 --- sklearn/metrics/_ranking.py | 9 ++- sklearn/metrics/tests/test_ranking.py | 86 +++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 0cdead9233898..28c89ef6357e4 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -1019,7 +1019,8 @@ def roc_curve( thresholds : ndarray of shape = (n_thresholds,) Decreasing thresholds on the decision function used to compute fpr and tpr. `thresholds[0]` represents no instances being predicted - and is arbitrarily set to `max(y_score) + 1`. + and is set to the next representable value above ``max(y_score)`` for + floating scores (or ``max(y_score) + 1`` otherwise). See Also -------- @@ -1083,7 +1084,11 @@ def roc_curve( # to make sure that the curve starts at (0, 0) tps = np.r_[0, tps] fps = np.r_[0, fps] - thresholds = np.r_[thresholds[0] + 1, thresholds] + if np.issubdtype(thresholds.dtype, np.floating): + prepend_thresh = np.nextafter(thresholds[0], np.inf, dtype=thresholds.dtype) + else: + prepend_thresh = thresholds[0] + 1 + thresholds = np.r_[prepend_thresh, thresholds] if fps[-1] <= 0: warnings.warn( diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index f38f118c38c0a..1478c2f810706 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -216,6 +216,49 @@ def test_roc_curve_end_points(): assert fpr.shape == thr.shape +def test_roc_curve_thresholds_probabilities_below_one(): + # Probabilistic scores below 1.0 should prepend the smallest greater float + y_true = np.array([0, 0, 1, 1]) + y_score = np.array([0.75, 0.2, 0.33, 0.9], dtype=np.float64) + + fpr, tpr, thresholds = roc_curve(y_true, y_score, drop_intermediate=False) + + max_score = y_score.max() + expected_prepend = np.nextafter(max_score, np.inf) + + assert thresholds[0] == pytest.approx(expected_prepend) + assert thresholds[0] > max_score + assert np.all(np.diff(thresholds) <= 0) + assert thresholds.shape == fpr.shape == tpr.shape + + +def test_roc_curve_thresholds_probability_one(): + # The prepend threshold for a max score of 1.0 should be nextafter(1.0) + y_true = np.array([0, 1, 0, 1]) + y_score = np.array([1.0, 0.6, 0.4, 0.8], dtype=np.float64) + + fpr, tpr, thresholds = roc_curve(y_true, y_score, drop_intermediate=False) + + expected_prepend = np.nextafter(1.0, np.inf, dtype=thresholds.dtype) + + assert thresholds[0] == pytest.approx(expected_prepend) + assert thresholds[1] == pytest.approx(1.0) + assert np.all(np.diff(thresholds) <= 0) + assert thresholds.shape == fpr.shape == tpr.shape + + +def test_roc_curve_thresholds_integer_scores(): + # Non-probability integer scores should continue to prepend max(score) + 1 + y_true = np.array([0, 1, 0, 1]) + y_score = np.array([2, 3, 1, 0], dtype=np.int64) + + fpr, tpr, thresholds = roc_curve(y_true, y_score, drop_intermediate=False) + + assert thresholds[0] == y_score.max() + 1 + assert np.all(np.diff(thresholds) <= 0) + assert thresholds.shape == fpr.shape == tpr.shape + + def test_roc_returns_consistency(): # Test whether the returned threshold matches up with tpr # make small toy dataset @@ -2199,3 +2242,46 @@ def test_ranking_metric_pos_label_types(metric, classes): assert not np.isnan(metric_1).any() assert not np.isnan(metric_2).any() assert not np.isnan(thresholds).any() + + +def test_roc_curve_thresholds_probabilities_below_one(): + # Probabilistic scores below 1.0 should prepend the smallest greater float + y_true = np.array([0, 0, 1, 1]) + y_score = np.array([0.75, 0.2, 0.33, 0.9], dtype=np.float64) + + fpr, tpr, thresholds = roc_curve(y_true, y_score, drop_intermediate=False) + + max_score = y_score.max() + expected_prepend = np.nextafter(max_score, np.inf) + + assert thresholds[0] == pytest.approx(expected_prepend) + assert thresholds[0] > max_score + assert np.all(np.diff(thresholds) <= 0) + assert thresholds.shape == fpr.shape == tpr.shape + + +def test_roc_curve_thresholds_probability_one(): + # The prepend threshold for a max score of 1.0 should be nextafter(1.0) + y_true = np.array([0, 1, 0, 1]) + y_score = np.array([1.0, 0.6, 0.4, 0.8], dtype=np.float64) + + fpr, tpr, thresholds = roc_curve(y_true, y_score, drop_intermediate=False) + + expected_prepend = np.nextafter(1.0, np.inf, dtype=thresholds.dtype) + + assert thresholds[0] == pytest.approx(expected_prepend) + assert thresholds[1] == pytest.approx(1.0) + assert np.all(np.diff(thresholds) <= 0) + assert thresholds.shape == fpr.shape == tpr.shape + + +def test_roc_curve_thresholds_integer_scores(): + # Non-probability integer scores should continue to prepend max(score) + 1 + y_true = np.array([0, 1, 0, 1]) + y_score = np.array([2, 3, 1, 0], dtype=np.int64) + + fpr, tpr, thresholds = roc_curve(y_true, y_score, drop_intermediate=False) + + assert thresholds[0] == y_score.max() + 1 + assert np.all(np.diff(thresholds) <= 0) + assert thresholds.shape == fpr.shape == tpr.shape From cbad0ac4e99573e0e116928d8017284171df1348 Mon Sep 17 00:00:00 2001 From: Casey Brooks Date: Fri, 26 Dec 2025 13:38:15 +0000 Subject: [PATCH 2/2] test(metrics): dedupe roc threshold regressions --- sklearn/metrics/tests/test_ranking.py | 42 --------------------------- 1 file changed, 42 deletions(-) diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index 1478c2f810706..89cb8b12ae3d5 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -216,48 +216,6 @@ def test_roc_curve_end_points(): assert fpr.shape == thr.shape -def test_roc_curve_thresholds_probabilities_below_one(): - # Probabilistic scores below 1.0 should prepend the smallest greater float - y_true = np.array([0, 0, 1, 1]) - y_score = np.array([0.75, 0.2, 0.33, 0.9], dtype=np.float64) - - fpr, tpr, thresholds = roc_curve(y_true, y_score, drop_intermediate=False) - - max_score = y_score.max() - expected_prepend = np.nextafter(max_score, np.inf) - - assert thresholds[0] == pytest.approx(expected_prepend) - assert thresholds[0] > max_score - assert np.all(np.diff(thresholds) <= 0) - assert thresholds.shape == fpr.shape == tpr.shape - - -def test_roc_curve_thresholds_probability_one(): - # The prepend threshold for a max score of 1.0 should be nextafter(1.0) - y_true = np.array([0, 1, 0, 1]) - y_score = np.array([1.0, 0.6, 0.4, 0.8], dtype=np.float64) - - fpr, tpr, thresholds = roc_curve(y_true, y_score, drop_intermediate=False) - - expected_prepend = np.nextafter(1.0, np.inf, dtype=thresholds.dtype) - - assert thresholds[0] == pytest.approx(expected_prepend) - assert thresholds[1] == pytest.approx(1.0) - assert np.all(np.diff(thresholds) <= 0) - assert thresholds.shape == fpr.shape == tpr.shape - - -def test_roc_curve_thresholds_integer_scores(): - # Non-probability integer scores should continue to prepend max(score) + 1 - y_true = np.array([0, 1, 0, 1]) - y_score = np.array([2, 3, 1, 0], dtype=np.int64) - - fpr, tpr, thresholds = roc_curve(y_true, y_score, drop_intermediate=False) - - assert thresholds[0] == y_score.max() + 1 - assert np.all(np.diff(thresholds) <= 0) - assert thresholds.shape == fpr.shape == tpr.shape - def test_roc_returns_consistency(): # Test whether the returned threshold matches up with tpr