Skip to content

Commit

Permalink
Improve warning messages
Browse files Browse the repository at this point in the history
  • Loading branch information
nnansters committed Feb 12, 2024
1 parent cce5029 commit 4343ee5
Show file tree
Hide file tree
Showing 4 changed files with 207 additions and 63 deletions.
90 changes: 69 additions & 21 deletions nannyml/performance_calculation/metrics/binary_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,11 @@ def _calculate(self, data: pd.DataFrame):
y_pred = data[self.y_pred_proba]

if y_true.nunique() <= 1:
warnings.warn("Calculated ROC-AUC score contains NaN values.")
return np.nan
warnings.warn(
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN
else:
return roc_auc_score(y_true, y_pred)

Expand Down Expand Up @@ -166,9 +169,18 @@ def _calculate(self, data: pd.DataFrame):
y_true = data[self.y_true]
y_pred = data[self.y_pred]

if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
warnings.warn("Calculated F1-score contains NaN values.")
return np.nan
if y_true.nunique() <= 1:
warnings.warn(
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN
elif y_pred.nunique() <= 1:
warnings.warn(

Check warning on line 179 in nannyml/performance_calculation/metrics/binary_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/binary_classification.py#L179

Added line #L179 was not covered by tests
f"'{self.y_pred}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN

Check warning on line 183 in nannyml/performance_calculation/metrics/binary_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/binary_classification.py#L183

Added line #L183 was not covered by tests
else:
return f1_score(y_true, y_pred)

Expand Down Expand Up @@ -233,9 +245,18 @@ def _calculate(self, data: pd.DataFrame):
y_true = data[self.y_true]
y_pred = data[self.y_pred]

if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
warnings.warn("Calculated Precision score contains NaN values.")
return np.nan
if y_true.nunique() <= 1:
warnings.warn(
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN
elif y_pred.nunique() <= 1:
warnings.warn(

Check warning on line 255 in nannyml/performance_calculation/metrics/binary_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/binary_classification.py#L255

Added line #L255 was not covered by tests
f"'{self.y_pred}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN

Check warning on line 259 in nannyml/performance_calculation/metrics/binary_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/binary_classification.py#L259

Added line #L259 was not covered by tests
else:
return precision_score(y_true, y_pred)

Expand Down Expand Up @@ -300,9 +321,18 @@ def _calculate(self, data: pd.DataFrame):
y_true = data[self.y_true]
y_pred = data[self.y_pred]

if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
warnings.warn("Calculated Recall score contains NaN values.")
return np.nan
if y_true.nunique() <= 1:
warnings.warn(
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN
elif y_pred.nunique() <= 1:
warnings.warn(

Check warning on line 331 in nannyml/performance_calculation/metrics/binary_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/binary_classification.py#L331

Added line #L331 was not covered by tests
f"'{self.y_pred}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN

Check warning on line 335 in nannyml/performance_calculation/metrics/binary_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/binary_classification.py#L335

Added line #L335 was not covered by tests
else:
return recall_score(y_true, y_pred)

Expand Down Expand Up @@ -367,9 +397,18 @@ def _calculate(self, data: pd.DataFrame):
y_true = data[self.y_true]
y_pred = data[self.y_pred]

if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
warnings.warn("Calculated Specificity score contains NaN values.")
return np.nan
if y_true.nunique() <= 1:
warnings.warn(
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN
elif y_pred.nunique() <= 1:
warnings.warn(

Check warning on line 407 in nannyml/performance_calculation/metrics/binary_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/binary_classification.py#L407

Added line #L407 was not covered by tests
f"'{self.y_pred}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN

Check warning on line 411 in nannyml/performance_calculation/metrics/binary_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/binary_classification.py#L411

Added line #L411 was not covered by tests
else:
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
return tn / (tn + fp)
Expand Down Expand Up @@ -435,9 +474,18 @@ def _calculate(self, data: pd.DataFrame):
y_true = data[self.y_true]
y_pred = data[self.y_pred]

if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
warnings.warn("Calculated Accuracy score contains NaN values.")
return np.nan
if y_true.nunique() <= 1:
warnings.warn(
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN
elif y_pred.nunique() <= 1:
warnings.warn(

Check warning on line 484 in nannyml/performance_calculation/metrics/binary_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/binary_classification.py#L484

Added line #L484 was not covered by tests
f"'{self.y_pred}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN

Check warning on line 488 in nannyml/performance_calculation/metrics/binary_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/binary_classification.py#L488

Added line #L488 was not covered by tests
else:
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
return (tp + tn) / (tp + tn + fp + fn)
Expand Down Expand Up @@ -537,7 +585,7 @@ def _calculate(self, data: pd.DataFrame):
y_pred = data[self.y_pred]

if y_true.shape[0] == 0:
warnings.warn("Calculated Business Value contains NaN values.")
warnings.warn(f"'{self.y_true}' contains no data, cannot calculate business value. Returning NaN.")
return np.NaN

tp_value = self.business_value_matrix[1, 1]
Expand Down Expand Up @@ -600,7 +648,7 @@ def __init__(
('False Positive', 'false_positive'),
('False Negative', 'false_negative'),
],
lower_threshold_limit=0
lower_threshold_limit=0,
)

self.upper_threshold_value_limit: Optional[float] = 1.0 if normalize_confusion_matrix else None
Expand Down Expand Up @@ -793,8 +841,8 @@ def _calculate_false_negatives(self, data: pd.DataFrame) -> float:
y_pred = data[self.y_pred]

if y_true.empty or y_pred.empty:
warnings.warn("Calculated false_negatives contain NaN values.")
return np.nan
warnings.warn(f"'{self.y_true}' contains no data, cannot calculate {self.display_name}. Returning NaN.")
return np.NaN

num_fn = np.sum(np.logical_and(np.logical_not(y_pred), y_true))
num_tn = np.sum(np.logical_and(np.logical_not(y_pred), np.logical_not(y_true)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,11 @@ def _calculate(self, data: pd.DataFrame):
)

if y_true.nunique() <= 1:
warnings.warn("Calculated ROC-AUC score contains NaN values.")
return np.nan
warnings.warn(

Check warning on line 135 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L135

Added line #L135 was not covered by tests
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
"Returning NaN."
)
return np.NaN

Check warning on line 139 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L139

Added line #L139 was not covered by tests
else:
return roc_auc_score(y_true, y_pred_proba, multi_class='ovr', average='macro', labels=labels)

Expand Down Expand Up @@ -219,9 +222,16 @@ def _calculate(self, data: pd.DataFrame):
f"could not calculate metric {self.display_name}: " "prediction column contains no data"
)

if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
warnings.warn("Calculated F1-score contains NaN values.")
return np.nan
if y_true.nunique() <= 1:
warnings.warn(

Check warning on line 226 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L226

Added line #L226 was not covered by tests
f"'{self.y_true}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
)
return np.NaN

Check warning on line 229 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L229

Added line #L229 was not covered by tests
elif y_pred.nunique() <= 1:
warnings.warn(

Check warning on line 231 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L231

Added line #L231 was not covered by tests
f"'{self.y_pred}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
)
return np.NaN

Check warning on line 234 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L234

Added line #L234 was not covered by tests
else:
return f1_score(y_true, y_pred, average='macro', labels=labels)

Expand Down Expand Up @@ -307,9 +317,16 @@ def _calculate(self, data: pd.DataFrame):
f"could not calculate metric {self.display_name}: " "prediction column contains no data"
)

if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
warnings.warn("Calculated Precision score contains NaN values.")
return np.nan
if y_true.nunique() <= 1:
warnings.warn(

Check warning on line 321 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L321

Added line #L321 was not covered by tests
f"'{self.y_true}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
)
return np.NaN

Check warning on line 324 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L324

Added line #L324 was not covered by tests
elif y_pred.nunique() <= 1:
warnings.warn(

Check warning on line 326 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L326

Added line #L326 was not covered by tests
f"'{self.y_pred}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
)
return np.NaN

Check warning on line 329 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L329

Added line #L329 was not covered by tests
else:
return precision_score(y_true, y_pred, average='macro', labels=labels)

Expand Down Expand Up @@ -395,9 +412,16 @@ def _calculate(self, data: pd.DataFrame):
f"could not calculate metric {self.display_name}: " "prediction column contains no data"
)

if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
warnings.warn("Calculated Recall score contains NaN values.")
return np.nan
if y_true.nunique() <= 1:
warnings.warn(

Check warning on line 416 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L416

Added line #L416 was not covered by tests
f"'{self.y_true}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
)
return np.NaN

Check warning on line 419 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L419

Added line #L419 was not covered by tests
elif y_pred.nunique() <= 1:
warnings.warn(

Check warning on line 421 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L421

Added line #L421 was not covered by tests
f"'{self.y_pred}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
)
return np.NaN

Check warning on line 424 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L424

Added line #L424 was not covered by tests
else:
return recall_score(y_true, y_pred, average='macro', labels=labels)

Expand Down Expand Up @@ -483,9 +507,16 @@ def _calculate(self, data: pd.DataFrame):
f"could not calculate metric {self.display_name}: prediction column contains no data"
)

if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
warnings.warn("Calculated Specificity score contains NaN values.")
return np.nan
if y_true.nunique() <= 1:
warnings.warn(

Check warning on line 511 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L511

Added line #L511 was not covered by tests
f"'{self.y_true}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
)
return np.NaN

Check warning on line 514 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L514

Added line #L514 was not covered by tests
elif y_pred.nunique() <= 1:
warnings.warn(

Check warning on line 516 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L516

Added line #L516 was not covered by tests
f"'{self.y_pred}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
)
return np.NaN

Check warning on line 519 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L519

Added line #L519 was not covered by tests
else:
MCM = multilabel_confusion_matrix(y_true, y_pred, labels=labels)
tn_sum = MCM[:, 0, 0]
Expand Down Expand Up @@ -596,7 +627,7 @@ def __init__(
threshold=threshold,
y_pred_proba=y_pred_proba,
components=[("None", "none")],
lower_threshold_limit=0
lower_threshold_limit=0,
)

self.normalize_confusion_matrix: Optional[str] = normalize_confusion_matrix
Expand Down
49 changes: 37 additions & 12 deletions nannyml/performance_calculation/metrics/regression.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Author: Niels Nuyttens <niels@nannyml.com>
#
# License: Apache Software License 2.0
import warnings
from typing import Optional, Tuple

import numpy as np
Expand Down Expand Up @@ -81,8 +82,12 @@ def _calculate(self, data: pd.DataFrame):
y_true = data[self.y_true]
y_pred = data[self.y_pred]

if y_true.empty or y_pred.empty:
return np.nan
if y_true.empty:
warnings.warn(f"'{self.y_true}' contains no data, cannot calculate {self.display_name}. Returning NaN.")
return np.NaN

Check warning on line 87 in nannyml/performance_calculation/metrics/regression.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/regression.py#L86-L87

Added lines #L86 - L87 were not covered by tests
elif y_pred.empty:
warnings.warn(f"'{self.y_pred}' contains no data, cannot calculate {self.display_name}. Returning NaN.")
return np.NaN

Check warning on line 90 in nannyml/performance_calculation/metrics/regression.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/regression.py#L89-L90

Added lines #L89 - L90 were not covered by tests

return mean_absolute_error(y_true, y_pred)

Expand Down Expand Up @@ -139,8 +144,12 @@ def _calculate(self, data: pd.DataFrame):
y_true = data[self.y_true]
y_pred = data[self.y_pred]

if y_true.empty or y_pred.empty:
return np.nan
if y_true.empty:
warnings.warn(f"'{self.y_true}' contains no data, cannot calculate {self.display_name}. Returning NaN.")
return np.NaN

Check warning on line 149 in nannyml/performance_calculation/metrics/regression.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/regression.py#L148-L149

Added lines #L148 - L149 were not covered by tests
elif y_pred.empty:
warnings.warn(f"'{self.y_pred}' contains no data, cannot calculate {self.display_name}. Returning NaN.")
return np.NaN

Check warning on line 152 in nannyml/performance_calculation/metrics/regression.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/regression.py#L151-L152

Added lines #L151 - L152 were not covered by tests

return mean_absolute_percentage_error(y_true, y_pred)

Expand Down Expand Up @@ -197,8 +206,12 @@ def _calculate(self, data: pd.DataFrame):
y_true = data[self.y_true]
y_pred = data[self.y_pred]

if y_true.empty or y_pred.empty:
return np.nan
if y_true.empty:
warnings.warn(f"'{self.y_true}' contains no data, cannot calculate {self.display_name}. Returning NaN.")
return np.NaN

Check warning on line 211 in nannyml/performance_calculation/metrics/regression.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/regression.py#L210-L211

Added lines #L210 - L211 were not covered by tests
elif y_pred.empty:
warnings.warn(f"'{self.y_pred}' contains no data, cannot calculate {self.display_name}. Returning NaN.")
return np.NaN

Check warning on line 214 in nannyml/performance_calculation/metrics/regression.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/regression.py#L213-L214

Added lines #L213 - L214 were not covered by tests

return mean_squared_error(y_true, y_pred)

Expand Down Expand Up @@ -255,8 +268,12 @@ def _calculate(self, data: pd.DataFrame):
y_true = data[self.y_true]
y_pred = data[self.y_pred]

if y_true.empty or y_pred.empty:
return np.nan
if y_true.empty:
warnings.warn(f"'{self.y_true}' contains no data, cannot calculate {self.display_name}. Returning NaN.")
return np.NaN

Check warning on line 273 in nannyml/performance_calculation/metrics/regression.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/regression.py#L272-L273

Added lines #L272 - L273 were not covered by tests
elif y_pred.empty:
warnings.warn(f"'{self.y_pred}' contains no data, cannot calculate {self.display_name}. Returning NaN.")
return np.NaN

Check warning on line 276 in nannyml/performance_calculation/metrics/regression.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/regression.py#L275-L276

Added lines #L275 - L276 were not covered by tests

# TODO: include option to drop negative values as well?

Expand Down Expand Up @@ -318,8 +335,12 @@ def _calculate(self, data: pd.DataFrame):
y_true = data[self.y_true]
y_pred = data[self.y_pred]

if y_true.empty or y_pred.empty:
return np.nan
if y_true.empty:
warnings.warn(f"'{self.y_true}' contains no data, cannot calculate {self.display_name}. Returning NaN.")
return np.NaN

Check warning on line 340 in nannyml/performance_calculation/metrics/regression.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/regression.py#L339-L340

Added lines #L339 - L340 were not covered by tests
elif y_pred.empty:
warnings.warn(f"'{self.y_pred}' contains no data, cannot calculate {self.display_name}. Returning NaN.")
return np.NaN

Check warning on line 343 in nannyml/performance_calculation/metrics/regression.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/regression.py#L342-L343

Added lines #L342 - L343 were not covered by tests

return mean_squared_error(y_true, y_pred, squared=False)

Expand Down Expand Up @@ -376,8 +397,12 @@ def _calculate(self, data: pd.DataFrame):
y_true = data[self.y_true]
y_pred = data[self.y_pred]

if y_true.empty or y_pred.empty:
return np.nan
if y_true.empty:
warnings.warn(f"'{self.y_true}' contains no data, cannot calculate {self.display_name}. Returning NaN.")
return np.NaN

Check warning on line 402 in nannyml/performance_calculation/metrics/regression.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/regression.py#L401-L402

Added lines #L401 - L402 were not covered by tests
elif y_pred.empty:
warnings.warn(f"'{self.y_pred}' contains no data, cannot calculate {self.display_name}. Returning NaN.")
return np.NaN

Check warning on line 405 in nannyml/performance_calculation/metrics/regression.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/regression.py#L404-L405

Added lines #L404 - L405 were not covered by tests

# TODO: include option to drop negative values as well?

Expand Down
Loading

0 comments on commit 4343ee5

Please sign in to comment.