Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace current error handling with "empty record" error handling #361

Merged
merged 4 commits into from
Feb 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 24 additions & 9 deletions nannyml/drift/univariate/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@
from __future__ import annotations

import warnings
from logging import Logger
from typing import Any, Dict, List, Optional, Union

import numpy as np
import pandas as pd
from pandas import MultiIndex

Expand Down Expand Up @@ -344,7 +346,7 @@
for column_name in self.continuous_column_names:
for method in self._column_to_models_mapping[column_name]:
try:
for k, v in _calculate_for_column(chunk.data, column_name, method).items():
for k, v in _calculate_for_column(chunk.data, column_name, method, self._logger).items():
row[f'{column_name}_{method.column_name}_{k}'] = v
except Exception as exc:
self._logger.error(
Expand All @@ -356,7 +358,7 @@
for column_name in self.categorical_column_names:
for method in self._column_to_models_mapping[column_name]:
try:
for k, v in _calculate_for_column(chunk.data, column_name, method).items():
for k, v in _calculate_for_column(chunk.data, column_name, method, self._logger).items():
row[f'{column_name}_{method.column_name}_{k}'] = v
except Exception as exc:
self._logger.error(
Expand Down Expand Up @@ -400,14 +402,27 @@
return self.result


def _calculate_for_column(data: pd.DataFrame, column_name: str, method: Method) -> Dict[str, Any]:
def _calculate_for_column(
data: pd.DataFrame, column_name: str, method: Method, logger: Optional[Logger] = None
) -> Dict[str, Any]:
result = {}
value = method.calculate(data[column_name])
result['value'] = value
result['upper_threshold'] = method.upper_threshold_value
result['lower_threshold'] = method.lower_threshold_value
result['alert'] = method.alert(value)
return result
try:
value = method.calculate(data[column_name])
result['value'] = value
result['upper_threshold'] = method.upper_threshold_value
result['lower_threshold'] = method.lower_threshold_value
result['alert'] = method.alert(value)
except Exception as exc:

Check warning on line 415 in nannyml/drift/univariate/calculator.py

View check run for this annotation

Codecov / codecov/patch

nannyml/drift/univariate/calculator.py#L415

Added line #L415 was not covered by tests
if logger:
logger.error(

Check warning on line 417 in nannyml/drift/univariate/calculator.py

View check run for this annotation

Codecov / codecov/patch

nannyml/drift/univariate/calculator.py#L417

Added line #L417 was not covered by tests
f"an unexpected exception occurred during calculation of method '{method.display_name}': " f"{exc}"
)
result['value'] = np.NaN
result['upper_threshold'] = method.upper_threshold_value
result['lower_threshold'] = method.lower_threshold_value
result['alert'] = np.NaN

Check warning on line 423 in nannyml/drift/univariate/calculator.py

View check run for this annotation

Codecov / codecov/patch

nannyml/drift/univariate/calculator.py#L420-L423

Added lines #L420 - L423 were not covered by tests
finally:
return result


def _create_multilevel_index(
Expand Down
10 changes: 2 additions & 8 deletions nannyml/performance_calculation/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,14 +354,8 @@ def _calculate(self, data: pd.DataFrame, *args, **kwargs) -> Result:
def _calculate_metrics_for_chunk(self, chunk: Chunk) -> Dict:
chunk_records: Dict[str, Any] = {}
for metric in self.metrics:
try:
chunk_record = metric.get_chunk_record(chunk.data)
chunk_records.update(chunk_record)
except Exception as exc:
self._logger.error(
f"an unexpected error occurred while calculating metric {metric.display_name}: {exc}"
)
continue
chunk_record = metric.get_chunk_record(chunk.data)
chunk_records.update(chunk_record)
return chunk_records


Expand Down
31 changes: 21 additions & 10 deletions nannyml/performance_calculation/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,16 +182,27 @@

chunk_record = {}

realized_value = self.calculate(chunk_data)
sampling_error = self.sampling_error(chunk_data)

chunk_record[f'{column_name}_sampling_error'] = sampling_error
chunk_record[f'{column_name}'] = realized_value
chunk_record[f'{column_name}_upper_threshold'] = self.upper_threshold_value
chunk_record[f'{column_name}_lower_threshold'] = self.lower_threshold_value
chunk_record[f'{column_name}_alert'] = self.alert(realized_value)

return chunk_record
try:
realized_value = self.calculate(chunk_data)
sampling_error = self.sampling_error(chunk_data)

chunk_record[f'{column_name}_sampling_error'] = sampling_error
chunk_record[f'{column_name}'] = realized_value
chunk_record[f'{column_name}_upper_threshold'] = self.upper_threshold_value
chunk_record[f'{column_name}_lower_threshold'] = self.lower_threshold_value
chunk_record[f'{column_name}_alert'] = self.alert(realized_value)
except Exception as exc:

Check warning on line 194 in nannyml/performance_calculation/metrics/base.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/base.py#L194

Added line #L194 was not covered by tests
if self._logger:
self._logger.error(

Check warning on line 196 in nannyml/performance_calculation/metrics/base.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/base.py#L196

Added line #L196 was not covered by tests
f"an unexpected exception occurred during calculation of method '{self.display_name}': " f"{exc}"
)
chunk_record[f'{column_name}_sampling_error'] = np.NaN
chunk_record[f'{column_name}'] = np.NaN
chunk_record[f'{column_name}_upper_threshold'] = self.upper_threshold_value
chunk_record[f'{column_name}_lower_threshold'] = self.lower_threshold_value
chunk_record[f'{column_name}_alert'] = np.NaN

Check warning on line 203 in nannyml/performance_calculation/metrics/base.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/base.py#L199-L203

Added lines #L199 - L203 were not covered by tests
finally:
return chunk_record

@property
def display_name(self) -> str:
Expand Down
90 changes: 69 additions & 21 deletions nannyml/performance_calculation/metrics/binary_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,11 @@
y_pred = data[self.y_pred_proba]

if y_true.nunique() <= 1:
warnings.warn("Calculated ROC-AUC score contains NaN values.")
return np.nan
warnings.warn(
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN
else:
return roc_auc_score(y_true, y_pred)

Expand Down Expand Up @@ -166,9 +169,18 @@
y_true = data[self.y_true]
y_pred = data[self.y_pred]

if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
warnings.warn("Calculated F1-score contains NaN values.")
return np.nan
if y_true.nunique() <= 1:
warnings.warn(
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN
elif y_pred.nunique() <= 1:
warnings.warn(

Check warning on line 179 in nannyml/performance_calculation/metrics/binary_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/binary_classification.py#L179

Added line #L179 was not covered by tests
f"'{self.y_pred}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN

Check warning on line 183 in nannyml/performance_calculation/metrics/binary_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/binary_classification.py#L183

Added line #L183 was not covered by tests
else:
return f1_score(y_true, y_pred)

Expand Down Expand Up @@ -233,9 +245,18 @@
y_true = data[self.y_true]
y_pred = data[self.y_pred]

if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
warnings.warn("Calculated Precision score contains NaN values.")
return np.nan
if y_true.nunique() <= 1:
warnings.warn(
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN
elif y_pred.nunique() <= 1:
warnings.warn(

Check warning on line 255 in nannyml/performance_calculation/metrics/binary_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/binary_classification.py#L255

Added line #L255 was not covered by tests
f"'{self.y_pred}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN

Check warning on line 259 in nannyml/performance_calculation/metrics/binary_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/binary_classification.py#L259

Added line #L259 was not covered by tests
else:
return precision_score(y_true, y_pred)

Expand Down Expand Up @@ -300,9 +321,18 @@
y_true = data[self.y_true]
y_pred = data[self.y_pred]

if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
warnings.warn("Calculated Recall score contains NaN values.")
return np.nan
if y_true.nunique() <= 1:
warnings.warn(
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN
elif y_pred.nunique() <= 1:
warnings.warn(

Check warning on line 331 in nannyml/performance_calculation/metrics/binary_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/binary_classification.py#L331

Added line #L331 was not covered by tests
f"'{self.y_pred}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN

Check warning on line 335 in nannyml/performance_calculation/metrics/binary_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/binary_classification.py#L335

Added line #L335 was not covered by tests
else:
return recall_score(y_true, y_pred)

Expand Down Expand Up @@ -367,9 +397,18 @@
y_true = data[self.y_true]
y_pred = data[self.y_pred]

if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
warnings.warn("Calculated Specificity score contains NaN values.")
return np.nan
if y_true.nunique() <= 1:
warnings.warn(
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN
elif y_pred.nunique() <= 1:
warnings.warn(

Check warning on line 407 in nannyml/performance_calculation/metrics/binary_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/binary_classification.py#L407

Added line #L407 was not covered by tests
f"'{self.y_pred}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN

Check warning on line 411 in nannyml/performance_calculation/metrics/binary_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/binary_classification.py#L411

Added line #L411 was not covered by tests
else:
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
return tn / (tn + fp)
Expand Down Expand Up @@ -435,9 +474,18 @@
y_true = data[self.y_true]
y_pred = data[self.y_pred]

if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
warnings.warn("Calculated Accuracy score contains NaN values.")
return np.nan
if y_true.nunique() <= 1:
warnings.warn(
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN
elif y_pred.nunique() <= 1:
warnings.warn(

Check warning on line 484 in nannyml/performance_calculation/metrics/binary_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/binary_classification.py#L484

Added line #L484 was not covered by tests
f"'{self.y_pred}' only contains a single class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN

Check warning on line 488 in nannyml/performance_calculation/metrics/binary_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/binary_classification.py#L488

Added line #L488 was not covered by tests
else:
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
return (tp + tn) / (tp + tn + fp + fn)
Expand Down Expand Up @@ -537,7 +585,7 @@
y_pred = data[self.y_pred]

if y_true.shape[0] == 0:
warnings.warn("Calculated Business Value contains NaN values.")
warnings.warn(f"'{self.y_true}' contains no data, cannot calculate business value. Returning NaN.")
return np.NaN

tp_value = self.business_value_matrix[1, 1]
Expand Down Expand Up @@ -600,7 +648,7 @@
('False Positive', 'false_positive'),
('False Negative', 'false_negative'),
],
lower_threshold_limit=0
lower_threshold_limit=0,
)

self.upper_threshold_value_limit: Optional[float] = 1.0 if normalize_confusion_matrix else None
Expand Down Expand Up @@ -793,8 +841,8 @@
y_pred = data[self.y_pred]

if y_true.empty or y_pred.empty:
warnings.warn("Calculated false_negatives contain NaN values.")
return np.nan
warnings.warn(f"'{self.y_true}' contains no data, cannot calculate {self.display_name}. Returning NaN.")
return np.NaN

num_fn = np.sum(np.logical_and(np.logical_not(y_pred), y_true))
num_tn = np.sum(np.logical_and(np.logical_not(y_pred), np.logical_not(y_true)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,11 @@
)

if y_true.nunique() <= 1:
warnings.warn("Calculated ROC-AUC score contains NaN values.")
return np.nan
warnings.warn(

Check warning on line 135 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L135

Added line #L135 was not covered by tests
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
"Returning NaN."
)
return np.NaN

Check warning on line 139 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L139

Added line #L139 was not covered by tests
else:
return roc_auc_score(y_true, y_pred_proba, multi_class='ovr', average='macro', labels=labels)

Expand Down Expand Up @@ -219,9 +222,16 @@
f"could not calculate metric {self.display_name}: " "prediction column contains no data"
)

if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
warnings.warn("Calculated F1-score contains NaN values.")
return np.nan
if y_true.nunique() <= 1:
warnings.warn(

Check warning on line 226 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L226

Added line #L226 was not covered by tests
f"'{self.y_true}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
)
return np.NaN

Check warning on line 229 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L229

Added line #L229 was not covered by tests
elif y_pred.nunique() <= 1:
warnings.warn(

Check warning on line 231 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L231

Added line #L231 was not covered by tests
f"'{self.y_pred}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
)
return np.NaN

Check warning on line 234 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L234

Added line #L234 was not covered by tests
else:
return f1_score(y_true, y_pred, average='macro', labels=labels)

Expand Down Expand Up @@ -307,9 +317,16 @@
f"could not calculate metric {self.display_name}: " "prediction column contains no data"
)

if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
warnings.warn("Calculated Precision score contains NaN values.")
return np.nan
if y_true.nunique() <= 1:
warnings.warn(

Check warning on line 321 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L321

Added line #L321 was not covered by tests
f"'{self.y_true}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
)
return np.NaN

Check warning on line 324 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L324

Added line #L324 was not covered by tests
elif y_pred.nunique() <= 1:
warnings.warn(

Check warning on line 326 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L326

Added line #L326 was not covered by tests
f"'{self.y_pred}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
)
return np.NaN

Check warning on line 329 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L329

Added line #L329 was not covered by tests
else:
return precision_score(y_true, y_pred, average='macro', labels=labels)

Expand Down Expand Up @@ -395,9 +412,16 @@
f"could not calculate metric {self.display_name}: " "prediction column contains no data"
)

if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
warnings.warn("Calculated Recall score contains NaN values.")
return np.nan
if y_true.nunique() <= 1:
warnings.warn(

Check warning on line 416 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L416

Added line #L416 was not covered by tests
f"'{self.y_true}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
)
return np.NaN

Check warning on line 419 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L419

Added line #L419 was not covered by tests
elif y_pred.nunique() <= 1:
warnings.warn(

Check warning on line 421 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L421

Added line #L421 was not covered by tests
f"'{self.y_pred}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
)
return np.NaN

Check warning on line 424 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L424

Added line #L424 was not covered by tests
else:
return recall_score(y_true, y_pred, average='macro', labels=labels)

Expand Down Expand Up @@ -483,9 +507,16 @@
f"could not calculate metric {self.display_name}: prediction column contains no data"
)

if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
warnings.warn("Calculated Specificity score contains NaN values.")
return np.nan
if y_true.nunique() <= 1:
warnings.warn(

Check warning on line 511 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L511

Added line #L511 was not covered by tests
f"'{self.y_true}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
)
return np.NaN

Check warning on line 514 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L514

Added line #L514 was not covered by tests
elif y_pred.nunique() <= 1:
warnings.warn(

Check warning on line 516 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L516

Added line #L516 was not covered by tests
f"'{self.y_pred}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
)
return np.NaN

Check warning on line 519 in nannyml/performance_calculation/metrics/multiclass_classification.py

View check run for this annotation

Codecov / codecov/patch

nannyml/performance_calculation/metrics/multiclass_classification.py#L519

Added line #L519 was not covered by tests
else:
MCM = multilabel_confusion_matrix(y_true, y_pred, labels=labels)
tn_sum = MCM[:, 0, 0]
Expand Down Expand Up @@ -596,7 +627,7 @@
threshold=threshold,
y_pred_proba=y_pred_proba,
components=[("None", "none")],
lower_threshold_limit=0
lower_threshold_limit=0,
)

self.normalize_confusion_matrix: Optional[str] = normalize_confusion_matrix
Expand Down
Loading
Loading