Skip to content

Commit 59fc796

Browse files
authored
Replace current error handling with "empty record" error handling (#361)
* Replace current error handling with "empty record" error handling * Fix stupid web editor merge * Improve warning messages
1 parent fc374e1 commit 59fc796

File tree

9 files changed

+330
-154
lines changed

9 files changed

+330
-154
lines changed

nannyml/drift/univariate/calculator.py

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,10 @@
3030
from __future__ import annotations
3131

3232
import warnings
33+
from logging import Logger
3334
from typing import Any, Dict, List, Optional, Union
3435

36+
import numpy as np
3537
import pandas as pd
3638
from pandas import MultiIndex
3739

@@ -344,7 +346,7 @@ def _calculate(self, data: pd.DataFrame, *args, **kwargs) -> Result:
344346
for column_name in self.continuous_column_names:
345347
for method in self._column_to_models_mapping[column_name]:
346348
try:
347-
for k, v in _calculate_for_column(chunk.data, column_name, method).items():
349+
for k, v in _calculate_for_column(chunk.data, column_name, method, self._logger).items():
348350
row[f'{column_name}_{method.column_name}_{k}'] = v
349351
except Exception as exc:
350352
self._logger.error(
@@ -356,7 +358,7 @@ def _calculate(self, data: pd.DataFrame, *args, **kwargs) -> Result:
356358
for column_name in self.categorical_column_names:
357359
for method in self._column_to_models_mapping[column_name]:
358360
try:
359-
for k, v in _calculate_for_column(chunk.data, column_name, method).items():
361+
for k, v in _calculate_for_column(chunk.data, column_name, method, self._logger).items():
360362
row[f'{column_name}_{method.column_name}_{k}'] = v
361363
except Exception as exc:
362364
self._logger.error(
@@ -400,14 +402,27 @@ def _calculate(self, data: pd.DataFrame, *args, **kwargs) -> Result:
400402
return self.result
401403

402404

403-
def _calculate_for_column(data: pd.DataFrame, column_name: str, method: Method) -> Dict[str, Any]:
405+
def _calculate_for_column(
406+
data: pd.DataFrame, column_name: str, method: Method, logger: Optional[Logger] = None
407+
) -> Dict[str, Any]:
404408
result = {}
405-
value = method.calculate(data[column_name])
406-
result['value'] = value
407-
result['upper_threshold'] = method.upper_threshold_value
408-
result['lower_threshold'] = method.lower_threshold_value
409-
result['alert'] = method.alert(value)
410-
return result
409+
try:
410+
value = method.calculate(data[column_name])
411+
result['value'] = value
412+
result['upper_threshold'] = method.upper_threshold_value
413+
result['lower_threshold'] = method.lower_threshold_value
414+
result['alert'] = method.alert(value)
415+
except Exception as exc:
416+
if logger:
417+
logger.error(
418+
f"an unexpected exception occurred during calculation of method '{method.display_name}': " f"{exc}"
419+
)
420+
result['value'] = np.NaN
421+
result['upper_threshold'] = method.upper_threshold_value
422+
result['lower_threshold'] = method.lower_threshold_value
423+
result['alert'] = np.NaN
424+
finally:
425+
return result
411426

412427

413428
def _create_multilevel_index(

nannyml/performance_calculation/calculator.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -354,14 +354,8 @@ def _calculate(self, data: pd.DataFrame, *args, **kwargs) -> Result:
354354
def _calculate_metrics_for_chunk(self, chunk: Chunk) -> Dict:
355355
chunk_records: Dict[str, Any] = {}
356356
for metric in self.metrics:
357-
try:
358-
chunk_record = metric.get_chunk_record(chunk.data)
359-
chunk_records.update(chunk_record)
360-
except Exception as exc:
361-
self._logger.error(
362-
f"an unexpected error occurred while calculating metric {metric.display_name}: {exc}"
363-
)
364-
continue
357+
chunk_record = metric.get_chunk_record(chunk.data)
358+
chunk_records.update(chunk_record)
365359
return chunk_records
366360

367361

nannyml/performance_calculation/metrics/base.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -182,16 +182,27 @@ def get_chunk_record(self, chunk_data: pd.DataFrame) -> Dict:
182182

183183
chunk_record = {}
184184

185-
realized_value = self.calculate(chunk_data)
186-
sampling_error = self.sampling_error(chunk_data)
187-
188-
chunk_record[f'{column_name}_sampling_error'] = sampling_error
189-
chunk_record[f'{column_name}'] = realized_value
190-
chunk_record[f'{column_name}_upper_threshold'] = self.upper_threshold_value
191-
chunk_record[f'{column_name}_lower_threshold'] = self.lower_threshold_value
192-
chunk_record[f'{column_name}_alert'] = self.alert(realized_value)
193-
194-
return chunk_record
185+
try:
186+
realized_value = self.calculate(chunk_data)
187+
sampling_error = self.sampling_error(chunk_data)
188+
189+
chunk_record[f'{column_name}_sampling_error'] = sampling_error
190+
chunk_record[f'{column_name}'] = realized_value
191+
chunk_record[f'{column_name}_upper_threshold'] = self.upper_threshold_value
192+
chunk_record[f'{column_name}_lower_threshold'] = self.lower_threshold_value
193+
chunk_record[f'{column_name}_alert'] = self.alert(realized_value)
194+
except Exception as exc:
195+
if self._logger:
196+
self._logger.error(
197+
f"an unexpected exception occurred during calculation of method '{self.display_name}': " f"{exc}"
198+
)
199+
chunk_record[f'{column_name}_sampling_error'] = np.NaN
200+
chunk_record[f'{column_name}'] = np.NaN
201+
chunk_record[f'{column_name}_upper_threshold'] = self.upper_threshold_value
202+
chunk_record[f'{column_name}_lower_threshold'] = self.lower_threshold_value
203+
chunk_record[f'{column_name}_alert'] = np.NaN
204+
finally:
205+
return chunk_record
195206

196207
@property
197208
def display_name(self) -> str:

nannyml/performance_calculation/metrics/binary_classification.py

Lines changed: 69 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,11 @@ def _calculate(self, data: pd.DataFrame):
9999
y_pred = data[self.y_pred_proba]
100100

101101
if y_true.nunique() <= 1:
102-
warnings.warn("Calculated ROC-AUC score contains NaN values.")
103-
return np.nan
102+
warnings.warn(
103+
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
104+
f"Returning NaN."
105+
)
106+
return np.NaN
104107
else:
105108
return roc_auc_score(y_true, y_pred)
106109

@@ -166,9 +169,18 @@ def _calculate(self, data: pd.DataFrame):
166169
y_true = data[self.y_true]
167170
y_pred = data[self.y_pred]
168171

169-
if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
170-
warnings.warn("Calculated F1-score contains NaN values.")
171-
return np.nan
172+
if y_true.nunique() <= 1:
173+
warnings.warn(
174+
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
175+
f"Returning NaN."
176+
)
177+
return np.NaN
178+
elif y_pred.nunique() <= 1:
179+
warnings.warn(
180+
f"'{self.y_pred}' only contains a single class for chunk, cannot calculate {self.display_name}. "
181+
f"Returning NaN."
182+
)
183+
return np.NaN
172184
else:
173185
return f1_score(y_true, y_pred)
174186

@@ -233,9 +245,18 @@ def _calculate(self, data: pd.DataFrame):
233245
y_true = data[self.y_true]
234246
y_pred = data[self.y_pred]
235247

236-
if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
237-
warnings.warn("Calculated Precision score contains NaN values.")
238-
return np.nan
248+
if y_true.nunique() <= 1:
249+
warnings.warn(
250+
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
251+
f"Returning NaN."
252+
)
253+
return np.NaN
254+
elif y_pred.nunique() <= 1:
255+
warnings.warn(
256+
f"'{self.y_pred}' only contains a single class for chunk, cannot calculate {self.display_name}. "
257+
f"Returning NaN."
258+
)
259+
return np.NaN
239260
else:
240261
return precision_score(y_true, y_pred)
241262

@@ -300,9 +321,18 @@ def _calculate(self, data: pd.DataFrame):
300321
y_true = data[self.y_true]
301322
y_pred = data[self.y_pred]
302323

303-
if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
304-
warnings.warn("Calculated Recall score contains NaN values.")
305-
return np.nan
324+
if y_true.nunique() <= 1:
325+
warnings.warn(
326+
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
327+
f"Returning NaN."
328+
)
329+
return np.NaN
330+
elif y_pred.nunique() <= 1:
331+
warnings.warn(
332+
f"'{self.y_pred}' only contains a single class for chunk, cannot calculate {self.display_name}. "
333+
f"Returning NaN."
334+
)
335+
return np.NaN
306336
else:
307337
return recall_score(y_true, y_pred)
308338

@@ -367,9 +397,18 @@ def _calculate(self, data: pd.DataFrame):
367397
y_true = data[self.y_true]
368398
y_pred = data[self.y_pred]
369399

370-
if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
371-
warnings.warn("Calculated Specificity score contains NaN values.")
372-
return np.nan
400+
if y_true.nunique() <= 1:
401+
warnings.warn(
402+
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
403+
f"Returning NaN."
404+
)
405+
return np.NaN
406+
elif y_pred.nunique() <= 1:
407+
warnings.warn(
408+
f"'{self.y_pred}' only contains a single class for chunk, cannot calculate {self.display_name}. "
409+
f"Returning NaN."
410+
)
411+
return np.NaN
373412
else:
374413
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
375414
return tn / (tn + fp)
@@ -435,9 +474,18 @@ def _calculate(self, data: pd.DataFrame):
435474
y_true = data[self.y_true]
436475
y_pred = data[self.y_pred]
437476

438-
if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
439-
warnings.warn("Calculated Accuracy score contains NaN values.")
440-
return np.nan
477+
if y_true.nunique() <= 1:
478+
warnings.warn(
479+
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
480+
f"Returning NaN."
481+
)
482+
return np.NaN
483+
elif y_pred.nunique() <= 1:
484+
warnings.warn(
485+
f"'{self.y_pred}' only contains a single class for chunk, cannot calculate {self.display_name}. "
486+
f"Returning NaN."
487+
)
488+
return np.NaN
441489
else:
442490
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
443491
return (tp + tn) / (tp + tn + fp + fn)
@@ -537,7 +585,7 @@ def _calculate(self, data: pd.DataFrame):
537585
y_pred = data[self.y_pred]
538586

539587
if y_true.shape[0] == 0:
540-
warnings.warn("Calculated Business Value contains NaN values.")
588+
warnings.warn(f"'{self.y_true}' contains no data, cannot calculate business value. Returning NaN.")
541589
return np.NaN
542590

543591
tp_value = self.business_value_matrix[1, 1]
@@ -600,7 +648,7 @@ def __init__(
600648
('False Positive', 'false_positive'),
601649
('False Negative', 'false_negative'),
602650
],
603-
lower_threshold_limit=0
651+
lower_threshold_limit=0,
604652
)
605653

606654
self.upper_threshold_value_limit: Optional[float] = 1.0 if normalize_confusion_matrix else None
@@ -793,8 +841,8 @@ def _calculate_false_negatives(self, data: pd.DataFrame) -> float:
793841
y_pred = data[self.y_pred]
794842

795843
if y_true.empty or y_pred.empty:
796-
warnings.warn("Calculated false_negatives contain NaN values.")
797-
return np.nan
844+
warnings.warn(f"'{self.y_true}' contains no data, cannot calculate {self.display_name}. Returning NaN.")
845+
return np.NaN
798846

799847
num_fn = np.sum(np.logical_and(np.logical_not(y_pred), y_true))
800848
num_tn = np.sum(np.logical_and(np.logical_not(y_pred), np.logical_not(y_true)))

nannyml/performance_calculation/metrics/multiclass_classification.py

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,11 @@ def _calculate(self, data: pd.DataFrame):
132132
)
133133

134134
if y_true.nunique() <= 1:
135-
warnings.warn("Calculated ROC-AUC score contains NaN values.")
136-
return np.nan
135+
warnings.warn(
136+
f"'{self.y_true}' only contains a single class for chunk, cannot calculate {self.display_name}. "
137+
"Returning NaN."
138+
)
139+
return np.NaN
137140
else:
138141
return roc_auc_score(y_true, y_pred_proba, multi_class='ovr', average='macro', labels=labels)
139142

@@ -219,9 +222,16 @@ def _calculate(self, data: pd.DataFrame):
219222
f"could not calculate metric {self.display_name}: " "prediction column contains no data"
220223
)
221224

222-
if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
223-
warnings.warn("Calculated F1-score contains NaN values.")
224-
return np.nan
225+
if y_true.nunique() <= 1:
226+
warnings.warn(
227+
f"'{self.y_true}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
228+
)
229+
return np.NaN
230+
elif y_pred.nunique() <= 1:
231+
warnings.warn(
232+
f"'{self.y_pred}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
233+
)
234+
return np.NaN
225235
else:
226236
return f1_score(y_true, y_pred, average='macro', labels=labels)
227237

@@ -307,9 +317,16 @@ def _calculate(self, data: pd.DataFrame):
307317
f"could not calculate metric {self.display_name}: " "prediction column contains no data"
308318
)
309319

310-
if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
311-
warnings.warn("Calculated Precision score contains NaN values.")
312-
return np.nan
320+
if y_true.nunique() <= 1:
321+
warnings.warn(
322+
f"'{self.y_true}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
323+
)
324+
return np.NaN
325+
elif y_pred.nunique() <= 1:
326+
warnings.warn(
327+
f"'{self.y_pred}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
328+
)
329+
return np.NaN
313330
else:
314331
return precision_score(y_true, y_pred, average='macro', labels=labels)
315332

@@ -395,9 +412,16 @@ def _calculate(self, data: pd.DataFrame):
395412
f"could not calculate metric {self.display_name}: " "prediction column contains no data"
396413
)
397414

398-
if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
399-
warnings.warn("Calculated Recall score contains NaN values.")
400-
return np.nan
415+
if y_true.nunique() <= 1:
416+
warnings.warn(
417+
f"'{self.y_true}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
418+
)
419+
return np.NaN
420+
elif y_pred.nunique() <= 1:
421+
warnings.warn(
422+
f"'{self.y_pred}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
423+
)
424+
return np.NaN
401425
else:
402426
return recall_score(y_true, y_pred, average='macro', labels=labels)
403427

@@ -483,9 +507,16 @@ def _calculate(self, data: pd.DataFrame):
483507
f"could not calculate metric {self.display_name}: prediction column contains no data"
484508
)
485509

486-
if (y_true.nunique() <= 1) or (y_pred.nunique() <= 1):
487-
warnings.warn("Calculated Specificity score contains NaN values.")
488-
return np.nan
510+
if y_true.nunique() <= 1:
511+
warnings.warn(
512+
f"'{self.y_true}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
513+
)
514+
return np.NaN
515+
elif y_pred.nunique() <= 1:
516+
warnings.warn(
517+
f"'{self.y_pred}' only contains a single class, cannot calculate {self.display_name}. Returning NaN."
518+
)
519+
return np.NaN
489520
else:
490521
MCM = multilabel_confusion_matrix(y_true, y_pred, labels=labels)
491522
tn_sum = MCM[:, 0, 0]
@@ -596,7 +627,7 @@ def __init__(
596627
threshold=threshold,
597628
y_pred_proba=y_pred_proba,
598629
components=[("None", "none")],
599-
lower_threshold_limit=0
630+
lower_threshold_limit=0,
600631
)
601632

602633
self.normalize_confusion_matrix: Optional[str] = normalize_confusion_matrix

0 commit comments

Comments
 (0)