Skip to content

Commit

Permalink
pr feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
nikml committed Feb 16, 2024
1 parent 4871a01 commit c239ea1
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 9 deletions.
6 changes: 3 additions & 3 deletions nannyml/sampling_error/summary_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
import numpy as np
import pandas as pd
from scipy.stats import gaussian_kde, moment
from logging import getLogger

logger = getLogger(__name__)

def summary_stats_std_sampling_error_components(col: pd.Series) -> Tuple:
"""
Expand All @@ -28,7 +30,7 @@ def summary_stats_std_sampling_error_components(col: pd.Series) -> Tuple:
return (std, moment_4th)


def summary_stats_std_sampling_error(sampling_error_components, col, logger) -> float:
def summary_stats_std_sampling_error(sampling_error_components, col) -> float:
"""
Calculate sampling error for Summary Stats Standard Deviation
using reference data.
Expand All @@ -42,8 +44,6 @@ def summary_stats_std_sampling_error(sampling_error_components, col, logger) ->
a set of parameters that were derived from reference data.
col:
the (analysis) column you want to calculate sampling error for.
logger:
logger to log calculation issues
Returns
-------
Expand Down
7 changes: 2 additions & 5 deletions nannyml/stats/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,12 @@

"""Module containing base classes for data quality calculations."""

from numpy import isfinite, issubdtype, number
from numpy import isnan


def _add_alert_flag(row_result: dict) -> bool:
flag = False
# issubdtype checks for numeric type
# # np.nan and np.inf pass
# isfinite makes check go false for them
if not ( issubdtype(type(row_result['value']), number) and isfinite(row_result['value'])):
if isnan(row_result['value']):
flag = True
if row_result['upper_threshold'] is not None:
if row_result['value'] > row_result['upper_threshold']:
Expand Down
2 changes: 1 addition & 1 deletion nannyml/stats/std/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def _calculate_for_column(self, data: pd.DataFrame, column_name: str) -> Dict[st
value = _calculate_std_value_stats(data[column_name])
result['value'] = value
result['sampling_error'] = summary_stats_std_sampling_error(
self._sampling_error_components[column_name], data[column_name], self._logger
self._sampling_error_components[column_name], data[column_name]
)
result['upper_confidence_boundary'] = result['value'] + SAMPLING_ERROR_RANGE * result['sampling_error']
result['lower_confidence_boundary'] = np.maximum(
Expand Down

0 comments on commit c239ea1

Please sign in to comment.