FalseNegativeLab
diff --git a/‎mlscorecheck/aggregated/_dataset.py‎
Lines changed: 1 addition & 1 deletion b/‎mlscorecheck/aggregated/_dataset.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mlscorecheck/aggregated/_experiment.py‎
Lines changed: 7 additions & 1 deletion b/‎mlscorecheck/aggregated/_experiment.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎mlscorecheck/aggregated/_fold.py‎
Lines changed: 4 additions & 2 deletions b/‎mlscorecheck/aggregated/_fold.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎mlscorecheck/aggregated/_folding_utils.py‎
Lines changed: 2 additions & 1 deletion b/‎mlscorecheck/aggregated/_folding_utils.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎mlscorecheck/aggregated/_generate_problems.py‎
Lines changed: 11 additions & 5 deletions b/‎mlscorecheck/aggregated/_generate_problems.py‎
Lines changed: 11 additions & 5 deletions
diff --git a/‎mlscorecheck/check/binary/_check_1_dataset_unknown_folds_mos.py‎
Lines changed: 1 addition & 1 deletion b/‎mlscorecheck/check/binary/_check_1_dataset_unknown_folds_mos.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mlscorecheck/check/bundles/retina/_diaretdb0.py‎
Lines changed: 11 additions & 6 deletions b/‎mlscorecheck/check/bundles/retina/_diaretdb0.py‎
Lines changed: 11 additions & 6 deletions
diff --git a/‎mlscorecheck/check/bundles/retina/_diaretdb1.py‎
Lines changed: 3 additions & 1 deletion b/‎mlscorecheck/check/bundles/retina/_diaretdb1.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎mlscorecheck/check/bundles/retina/_drive.py‎
Lines changed: 3 additions & 1 deletion b/‎mlscorecheck/check/bundles/retina/_drive.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎mlscorecheck/check/regression/_check_regression.py‎
Lines changed: 4 additions & 4 deletions b/‎mlscorecheck/check/regression/_check_regression.py‎
Lines changed: 4 additions & 4 deletions
@@ -60,7 +60,7 @@ def resolve_pn(self):
         """
         Resolves the ``p`` and ``n`` values from the name of the dataset
         """
-        if self.p is None:
+        if self.p is None and self.dataset_name is not None:
             dataset = dataset_statistics[self.dataset_name]
             self.p = dataset["p"]
             self.n = dataset["n"]
 
@@ -113,7 +113,13 @@ def calculate_scores(
         if self.aggregation == "som":
             self.scores = calculate_scores_for_lp(self.figures, score_subset=score_subset)
         elif self.aggregation == "mos":
-            self.scores = dict_mean([evaluation.scores for evaluation in self.evaluations])
+            self.scores = dict_mean(
+                [
+                    evaluation.scores
+                    for evaluation in self.evaluations
+                    if evaluation.scores is not None
+                ]
+            )
 
         if self.scores is None:
             return {}
 
@@ -7,6 +7,8 @@
 the linear programming problem
 """
 
+from typing import Any
+
 import pulp as pl
 
 from ..core import init_random_state, round_scores
@@ -34,8 +36,8 @@ def __init__(self, p: int, n: int, identifier: str | None = None):
         self.n = n
         self.identifier = random_identifier(5) if identifier is None else identifier
 
-        self.tp: int | None = None
-        self.tn: int | None = None
+        self.tp: Any = None  # Can be int | None or pl.LpVariable
+        self.tn: Any = None  # Can be int | None or pl.LpVariable
         self.scores: dict | None = None
 
         self.variable_names = {
 
@@ -3,6 +3,7 @@
 """
 
 import copy
+from typing import Any, cast
 
 import numpy as np
 from sklearn.model_selection import StratifiedKFold
@@ -194,7 +195,7 @@ def create_folds_multiclass(dataset: dict, folding: dict) -> list:
         raise ValueError("either specify the folds or the folding strategy")
 
     if "folds" in folding:
-        return folding["folds"]
+        return cast(list[Any], folding["folds"])
     if folding.get("strategy") == "stratified_sklearn":
         folds = multiclass_stratified_folds(dataset, folding.get("n_folds", 1))
     else:
 
@@ -307,7 +307,13 @@ def get_dataset_score_bounds(
     Returns:
         dict(str,tuple(float,float)): the score bounds
     """
-    score_bounds = dict_minmax([evaluation.scores for evaluation in experiment.evaluations])
+    score_bounds = dict_minmax(
+        [
+            evaluation.scores
+            for evaluation in experiment.evaluations
+            if evaluation.scores is not None
+        ]
+    )
     for key, value in score_bounds.items():
         score_bounds[key] = (
             max(0.0, value[0] - numerical_tolerance),
@@ -349,8 +355,8 @@ def generate_scores_for_testsets(
         testset["tn"] = random_state.randint(testset["n"] + 1)
 
     if aggregation == "mos":
-        scores = [calculate_scores_for_lp(testset) for testset in testsets]
-        scores = round_scores(dict_mean(scores), rounding_decimals=rounding_decimals)
+        scores_list = [calculate_scores_for_lp(testset) for testset in testsets]
+        scores = round_scores(dict_mean(scores_list), rounding_decimals=rounding_decimals)
         return {key: value for key, value in scores.items() if key in subset}
 
     mean_figures = dict_mean(testsets)
@@ -431,7 +437,7 @@ def generate_dataset_folding_multiclass(
     ]
 
     if aggregation == "mos":
-        scores = [
+        scores_list = [
             calculate_multiclass_scores(
                 sample,
                 average=average,
@@ -440,7 +446,7 @@ def generate_dataset_folding_multiclass(
             )
             for sample in samples
         ]
-        scores = round_scores(dict_mean(scores), rounding_decimals=rounding_decimals)
+        scores = round_scores(dict_mean(scores_list), rounding_decimals=rounding_decimals)
         return dataset, folding, scores
 
     # if aggregation == 'som':
 
@@ -37,7 +37,7 @@ def estimate_n_evaluations(
         )
     )
 
-    return count**n_repeats
+    return int(count**n_repeats)
 
 
 def check_1_dataset_unknown_folds_mos(
 
@@ -2,6 +2,8 @@
 Tests for the DIARETDB0 dataset
 """
 
+from typing import Any, cast
+
 from ....core import NUMERICAL_TOLERANCE
 from ....experiments import get_experiment
 from ...binary import check_n_testsets_mos_no_kfold, check_n_testsets_som_no_kfold
@@ -114,12 +116,15 @@ def check_diaretdb0_class_som(
     """
     testsets = _prepare_configuration_diaretdb0(subset, batch, class_name)
 
-    return check_n_testsets_som_no_kfold(
-        testsets=testsets,
-        scores=scores,
-        eps=eps,
-        numerical_tolerance=numerical_tolerance,
-        prefilter_by_pairs=True,
+    return cast(
+        dict[Any, Any],
+        check_n_testsets_som_no_kfold(
+            testsets=testsets,
+            scores=scores,
+            eps=eps,
+            numerical_tolerance=numerical_tolerance,
+            prefilter_by_pairs=True,
+        ),
     )
 
 
 
@@ -2,6 +2,8 @@
 This module implements tests for the DIARETDB1 dataset
 """
 
+from typing import Any, cast
+
 from ....core import NUMERICAL_TOLERANCE, logger
 from ....experiments import get_experiment
 from ...binary import (
@@ -103,7 +105,7 @@ def _prepare_configuration_diaretdb1(
     if only_valid:
         testsets = [tset for tset in testsets if tset["p"] > 0 and tset["n"] > 0]
 
-    return testsets if pixel_level else testset
+    return cast(list[Any], testsets if pixel_level else testset)
 
 
 def check_diaretdb1_class(
 
@@ -3,6 +3,8 @@
 segmentation drive dataset
 """
 
+from typing import Any
+
 from ....core import NUMERICAL_TOLERANCE
 from ....experiments import get_experiment
 from ...binary import (
@@ -239,7 +241,7 @@ def check_drive_vessel_image_assumption(
             The evidence for satisfying the consistency constraints.
 
     """
-    images = get_experiment("retina.drive")
+    images: Any = get_experiment("retina.drive")
     testset = [
         image
         for image in images[(annotator, assumption)]["train"]["images"]
 
@@ -34,7 +34,7 @@ def mean_average_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:
     Returns:
         float: the MAE score
     """
-    return np.mean(np.abs(y_true - y_pred))
+    return float(np.mean(np.abs(y_true - y_pred)))
 
 
 def mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:
@@ -48,7 +48,7 @@ def mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:
     Returns:
         float: the MSE score
     """
-    return np.mean((y_true - y_pred) ** 2)
+    return float(np.mean((y_true - y_pred) ** 2))
 
 
 def root_mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:
@@ -62,7 +62,7 @@ def root_mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:
     Returns:
         float: the RMSE score
     """
-    return np.sqrt(mean_squared_error(y_true, y_pred))
+    return float(np.sqrt(mean_squared_error(y_true, y_pred)))
 
 
 def r_squared(y_true: np.ndarray, y_pred: np.ndarray) -> float:
@@ -76,7 +76,7 @@ def r_squared(y_true: np.ndarray, y_pred: np.ndarray) -> float:
     Returns:
         float: the R2 score
     """
-    return 1.0 - np.sum((y_true - y_pred) ** 2) / (np.var(y_true) * y_true.shape[0])
+    return float(1.0 - np.sum((y_true - y_pred) ** 2) / (np.var(y_true) * y_true.shape[0]))
 
 
 regression_scores = {
Original file line number	Diff line number	Diff line change
`@@ -37,7 +37,7 @@ def estimate_n_evaluations(`
`37`	`37`	`)`
`38`	`38`	`)`
`39`	`39`
`40`		`- return count**n_repeats`
	`40`	`+ return int(count**n_repeats)`
`41`	`41`
`42`	`42`
`43`	`43`	`def check_1_dataset_unknown_folds_mos(`