Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Average precision and AUROC update #374

Merged
merged 8 commits into from
Mar 8, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add CBPE BC AP implementation
nikml committed Mar 6, 2024
commit 28014801c7504852ac6a66ef0d8f2e77a7cfc6e3
Original file line number Diff line number Diff line change
@@ -185,7 +185,7 @@ def _calculate(self, data: pd.DataFrame):
data = _remove_nans(data, (self.y_true, self.y_pred))

y_true = data[self.y_true]
y_pred = data[self.y_pred_proba]
y_pred_proba = data[self.y_pred_proba]

if 1 not in y_true.unique():
warnings.warn(
@@ -194,7 +194,7 @@ def _calculate(self, data: pd.DataFrame):
)
return np.NaN
else:
return average_precision_score(y_true, y_pred)
return average_precision_score(y_true, y_pred_proba)

def _sampling_error(self, data: pd.DataFrame) -> float:
return ap_sampling_error(self._sampling_error_components, data)
Original file line number Diff line number Diff line change
@@ -24,6 +24,7 @@
SUPPORTED_METRIC_VALUES = [
'roc_auc',
'f1',
'average_precision',
'precision',
'recall',
'specificity',
1 change: 1 addition & 0 deletions nannyml/performance_estimation/confidence_based/cbpe.py
Original file line number Diff line number Diff line change
@@ -49,6 +49,7 @@
'accuracy': StandardDeviationThreshold(),
'confusion_matrix': StandardDeviationThreshold(),
'business_value': StandardDeviationThreshold(),
'average_precision': StandardDeviationThreshold(),
}


159 changes: 159 additions & 0 deletions nannyml/performance_estimation/confidence_based/metrics.py
Original file line number Diff line number Diff line change
@@ -25,6 +25,7 @@
precision_score,
recall_score,
roc_auc_score,
average_precision_score
)
from sklearn.preprocessing import LabelBinarizer, label_binarize

@@ -448,6 +449,164 @@ def estimate_roc_auc(y_pred_proba: pd.Series) -> float:
return metric


@MetricFactory.register('average_precision', ProblemType.CLASSIFICATION_BINARY)
class BinaryClassificationAP(Metric):
"""CBPE binary classification AP Metric Class."""

def __init__(
self,
y_pred_proba: ModelOutputsType,
y_pred: str,
y_true: str,
chunker: Chunker,
threshold: Threshold,
timestamp_column_name: Optional[str] = None,
**kwargs,
):
"""Initialize CBPE binary classification AP Metric Class."""
super().__init__(
name='average_precision',
y_pred_proba=y_pred_proba,
y_pred=y_pred,
y_true=y_true,
timestamp_column_name=timestamp_column_name,
chunker=chunker,
threshold=threshold,
components=[('Average Precision', 'average_precision')],
lower_threshold_value_limit=0,
upper_threshold_value_limit=1,
)

# sampling error
self._sampling_error_components: Tuple = ()

def _common_cleaning( # type: ignore
self, data: pd.DataFrame, selected_columns: List[str]
) -> Tuple[List[pd.Series], bool]:
"""Remove NaN values from rows of selected columns.

Parameters
----------
data: pd.DataFrame
Pandas dataframe containing data.
selected_columns: List[str]
List containing the strings of column names

Returns
-------
col_list:
List containing the clean columns specified. Order of columns from selected_columns is
preserved.
"""
# If we want target and it's not available we get None
if not set(selected_columns) <= set(data.columns):
raise InvalidArgumentsException(
f"Selected columns: {selected_columns} not all present in provided data columns {list(data.columns)}"
)
df = data[selected_columns].dropna(axis=0, how='any', inplace=False).reset_index()
empty: bool = False
if df.shape[0] == 0:
empty = True
results = []
for el in selected_columns:
results.append(df[el])
return (results, empty)

def _fit(self, reference_data: pd.DataFrame):
"""Metric _fit implementation on reference data."""
# if requested columns are missing we want to raise an error.
_dat, _empty = self._common_cleaning(
data=reference_data,
selected_columns=[self.y_true, self.y_pred_proba] # type: ignore
)
y_true, y_pred_proba = _dat

# if empty then positive class won't be part of y_true series
if 1 not in y_true.unique():
self._logger.debug(f"Not enough data to compute fit {self.display_name}.")
warnings.warn(f"Not enough data to compute fit {self.display_name}.")
self._sampling_error_components = np.NaN, 0
else:
self._sampling_error_components = bse.ap_sampling_error_components(
y_true_reference=y_true,
y_pred_proba_reference=y_pred_proba,
)

def _estimate(self, data: pd.DataFrame):
calibrated_y_pred_proba = data[self.y_pred_proba].to_numpy()
uncalibrated_y_pred_proba = data[self.uncalibrated_y_pred_proba].to_numpy()

return estimate_ap(calibrated_y_pred_proba, uncalibrated_y_pred_proba)

def _realized_performance(self, data: pd.DataFrame) -> float:
try:
_dat, _ = self._common_cleaning(
data=data,
selected_columns=[self.uncalibrated_y_pred_proba, self.y_true]
)
except InvalidArgumentsException as ex:
if "not all present in provided data columns" in str(ex):
self._logger.debug(str(ex))
return np.NaN
else:
raise ex
uncalibrated_y_pred_proba, y_true = _dat

# if empty then positive class won't be part of y_true series
if 1 not in y_true.unique():
warnings.warn(
f"'{self.y_true}' does not contain positive class for chunk, cannot calculate {self.display_name}. "
f"Returning NaN."
)
return np.NaN
else:
return average_precision_score(y_true, uncalibrated_y_pred_proba)

def _sampling_error(self, data: pd.DataFrame) -> float:
return bse.ap_sampling_error(self._sampling_error_components, data)


def estimate_ap(calibrated_y_pred_proba: np.ndarray, uncalibrated_y_pred_proba: np.ndarray) -> float:
"""Estimates the AP metric.

Parameters
----------
calibrated_y_pred_proba : pd.Series
Calibrated probability estimates of the sample for each class in the model.
uncalibrated_y_pred_proba : pd.Series
Raw probability estimates of the sample for each class in the model.

Returns
-------
metric: float
Estimated AP score.
"""
descending_order_index = np.argsort(uncalibrated_y_pred_proba)[::-1]
calibrated_y_pred_proba = calibrated_y_pred_proba[descending_order_index]

tps = np.cumsum(calibrated_y_pred_proba)
fps = 1 + np.arange(calibrated_y_pred_proba.shape[0]) - tps
tps = np.round(tps, 5)
fps = np.round(fps, 5)
ps = np.arange(1, tps.shape[0] + 1)

precision = tps / ps
# we add an element for (tps, fps) = (0,0) after the division to avoid error
precision = np.r_[1, precision]
tps = np.r_[0, tps]
recall = tps / tps[-1]
# reverse so (0,1) is last element
# recall is descending from 1 to 0
precision = precision[::-1]
recall = recall[::-1]

# actual AP calculation
# https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/metrics/_ranking.py#L236
# non unique values will be eliminated because diff will be 0!
metric = -np.sum(np.diff(recall) * precision[:-1])
return metric


@MetricFactory.register('f1', ProblemType.CLASSIFICATION_BINARY)
class BinaryClassificationF1(Metric):
"""CBPE binary classification f1 Metric Class."""
Loading