From 0d0fd8dadd12e35ffae72a4df44028314ed9119b Mon Sep 17 00:00:00 2001 From: Niels Nuyttens Date: Mon, 15 Jan 2024 10:25:07 +0100 Subject: [PATCH] Fix Flake8 & mypy --- nannyml/distribution/__init__.py | 2 +- .../distribution/categorical/calculator.py | 4 +-- nannyml/distribution/categorical/result.py | 30 +++++++++---------- nannyml/distribution/continuous/calculator.py | 4 +-- nannyml/distribution/continuous/result.py | 27 +++++++---------- 5 files changed, 30 insertions(+), 37 deletions(-) diff --git a/nannyml/distribution/__init__.py b/nannyml/distribution/__init__.py index 2da9fc4d..8d329020 100644 --- a/nannyml/distribution/__init__.py +++ b/nannyml/distribution/__init__.py @@ -1,2 +1,2 @@ -from .continuous import ContinuousDistributionCalculator from .categorical import CategoricalDistributionCalculator +from .continuous import ContinuousDistributionCalculator diff --git a/nannyml/distribution/categorical/calculator.py b/nannyml/distribution/categorical/calculator.py index f7bc9233..6725ee84 100644 --- a/nannyml/distribution/categorical/calculator.py +++ b/nannyml/distribution/categorical/calculator.py @@ -1,4 +1,4 @@ -from typing import Union, List, Optional +from typing import List, Optional, Union import numpy as np import pandas as pd @@ -28,7 +28,7 @@ def __init__( timestamp_column_name, ) - self.column_names = column_names + self.column_names = column_names if isinstance(column_names, List) else [column_names] self.result: Optional[Result] = None self._was_fitted: bool = False diff --git a/nannyml/distribution/categorical/result.py b/nannyml/distribution/categorical/result.py index 951c7898..1f74aa3e 100644 --- a/nannyml/distribution/categorical/result.py +++ b/nannyml/distribution/categorical/result.py @@ -1,19 +1,20 @@ import copy import math -from typing import List, Optional, Union, Dict, Any, overload -from typing_extensions import Self +from typing import Any, Dict, List, Optional, Union import numpy as np import pandas as pd import plotly.graph_objs as go +from typing_extensions import Self from nannyml import Chunker from nannyml._typing import Key from nannyml.base import AbstractResult from nannyml.drift.univariate.result import Result as DriftResult from nannyml.exceptions import InvalidArgumentsException -from nannyml.plots import is_time_based_x_axis, Colors, Figure, Hover -from nannyml.plots.components.stacked_bar_plot import stacked_bar, alert as stacked_bar_alert +from nannyml.plots import Colors, Figure, is_time_based_x_axis +from nannyml.plots.components.stacked_bar_plot import alert as stacked_bar_alert +from nannyml.plots.components.stacked_bar_plot import stacked_bar class Result(AbstractResult): @@ -170,7 +171,7 @@ def plot(self, drift_result: Optional[DriftResult] = None, *args, **kwargs) -> g else _plot_categorical_distribution(self) ) - def check_is_compatible_with(self, drift_result: DriftResult) -> bool: + def check_is_compatible_with(self, drift_result: DriftResult): # Check if all distribution columns are present in the drift result drift_column_names = set([col for tup in drift_result.keys() for col, _ in tup]) distribution_column_names = set(self.column_names) @@ -244,10 +245,6 @@ def _plot_categorical_distribution( reference_result = result.filter(period='reference', column_names=[column_name]) analysis_result = result.filter(period='analysis', column_names=[column_name]) - analysis_chunk_start_dates = analysis_result.chunk_start_dates - analysis_chunk_end_dates = analysis_result.chunk_end_dates - x_axis_is_time_based = is_time_based_x_axis(analysis_chunk_start_dates, analysis_chunk_end_dates) - figure = _plot_stacked_bar( figure=figure, row=row, @@ -265,8 +262,8 @@ def _plot_categorical_distribution( analysis_chunk_keys=analysis_result.chunk_keys, analysis_chunk_periods=analysis_result.chunk_periods, analysis_chunk_indices=analysis_result.chunk_indices, - analysis_chunk_start_dates=analysis_chunk_start_dates, - analysis_chunk_end_dates=analysis_chunk_end_dates, + analysis_chunk_start_dates=analysis_result.chunk_start_dates, + analysis_chunk_end_dates=analysis_result.chunk_end_dates, ) return figure @@ -274,7 +271,7 @@ def _plot_categorical_distribution( def _plot_categorical_distribution_with_alerts( result: Result, - drift_result: Optional[DriftResult] = None, + drift_result: DriftResult, title: Optional[str] = 'Column distributions', figure: Optional[go.Figure] = None, x_axis_time_title: str = 'Time', @@ -407,9 +404,12 @@ def _plot_stacked_bar( ) assert reference_chunk_indices is not None - analysis_chunk_indices = analysis_chunk_indices + (max(reference_chunk_indices) + 1) + analysis_chunk_indices = (analysis_chunk_indices + (max(reference_chunk_indices) + 1)).reset_index(drop=True) analysis_value_counts['chunk_indices'] += max(reference_chunk_indices) + 1 + if analysis_chunk_start_dates is not None: + analysis_chunk_start_dates = analysis_chunk_start_dates.reset_index(drop=True) + figure = stacked_bar( figure=figure, stacked_bar_table=analysis_value_counts, @@ -429,8 +429,8 @@ def _plot_stacked_bar( alerts=analysis_alerts, stacked_bar_table=analysis_value_counts, color=Colors.RED_IMPERIAL, - chunk_indices=analysis_chunk_indices.reset_index(drop=True), - chunk_start_dates=analysis_chunk_start_dates.reset_index(drop=True), + chunk_indices=analysis_chunk_indices, + chunk_start_dates=analysis_chunk_start_dates, chunk_end_dates=analysis_chunk_end_dates, showlegend=True, legendgroup=column_name, diff --git a/nannyml/distribution/continuous/calculator.py b/nannyml/distribution/continuous/calculator.py index fa5d6528..deee1517 100644 --- a/nannyml/distribution/continuous/calculator.py +++ b/nannyml/distribution/continuous/calculator.py @@ -1,5 +1,5 @@ from functools import partial -from typing import Union, List, Optional +from typing import List, Optional, Union import numpy as np import pandas as pd @@ -31,7 +31,7 @@ def __init__( timestamp_column_name, ) - self.column_names = column_names + self.column_names = column_names if isinstance(column_names, List) else [column_names] self.result: Optional[Result] = None def _fit(self, reference_data: pd.DataFrame, *args, **kwargs) -> Self: diff --git a/nannyml/distribution/continuous/result.py b/nannyml/distribution/continuous/result.py index 7f5c520d..42788bdd 100644 --- a/nannyml/distribution/continuous/result.py +++ b/nannyml/distribution/continuous/result.py @@ -1,5 +1,5 @@ import math -from typing import List, Optional, Dict, Any, Union +from typing import Any, Dict, List, Optional, Union import numpy as np import pandas as pd @@ -10,8 +10,9 @@ from nannyml.base import PerColumnResult from nannyml.drift.univariate.result import Result as DriftResult from nannyml.exceptions import InvalidArgumentsException -from nannyml.plots import is_time_based_x_axis, Hover, Colors, Figure -from nannyml.plots.components.joy_plot import joy, alert as joy_alert +from nannyml.plots import Colors, Figure, Hover, is_time_based_x_axis +from nannyml.plots.components.joy_plot import alert as joy_alert +from nannyml.plots.components.joy_plot import joy class Result(PerColumnResult): @@ -58,7 +59,7 @@ def plot(self, drift_result: Optional[DriftResult] = None, *args, **kwargs) -> g else _plot_continuous_distribution(self) ) - def check_is_compatible_with(self, drift_result: DriftResult) -> bool: + def check_is_compatible_with(self, drift_result: DriftResult): # Check if all distribution columns are present in the drift result drift_column_names = set([col for tup in drift_result.keys() for col, _ in tup]) distribution_column_names = set(self.column_names) @@ -132,10 +133,6 @@ def _plot_continuous_distribution( (column_name,) = key.properties - analysis_chunk_start_dates = analysis_result.chunk_start_dates - analysis_chunk_end_dates = analysis_result.chunk_end_dates - x_axis_is_time_based = is_time_based_x_axis(analysis_chunk_start_dates, analysis_chunk_end_dates) - figure = _plot_joyplot( figure=figure, row=row, @@ -153,8 +150,8 @@ def _plot_continuous_distribution( analysis_chunk_keys=analysis_result.chunk_keys, analysis_chunk_periods=analysis_result.chunk_periods, analysis_chunk_indices=analysis_result.chunk_indices, - analysis_chunk_start_dates=analysis_chunk_start_dates, - analysis_chunk_end_dates=analysis_chunk_end_dates, + analysis_chunk_start_dates=analysis_result.chunk_start_dates, + analysis_chunk_end_dates=analysis_result.chunk_end_dates, ) return figure @@ -162,7 +159,7 @@ def _plot_continuous_distribution( def _plot_continuous_distribution_with_alerts( result: Result, - drift_result: Optional[DriftResult] = None, + drift_result: DriftResult, title: Optional[str] = 'Column distributions', figure: Optional[go.Figure] = None, x_axis_time_title: str = 'Time', @@ -212,10 +209,6 @@ def _plot_continuous_distribution_with_alerts( (column_name, method_name) = drift_key.properties - analysis_chunk_start_dates = analysis_result.chunk_start_dates - analysis_chunk_end_dates = analysis_result.chunk_end_dates - x_axis_is_time_based = is_time_based_x_axis(analysis_chunk_start_dates, analysis_chunk_end_dates) - # reference_alerts = drift_result.filter(period='reference').alerts(drift_key) analysis_alerts = drift_result.filter(period='analysis').alerts(drift_key) @@ -236,8 +229,8 @@ def _plot_continuous_distribution_with_alerts( analysis_chunk_keys=analysis_result.chunk_keys, analysis_chunk_periods=analysis_result.chunk_periods, analysis_chunk_indices=analysis_result.chunk_indices, - analysis_chunk_start_dates=analysis_chunk_start_dates, - analysis_chunk_end_dates=analysis_chunk_end_dates, + analysis_chunk_start_dates=analysis_result.chunk_start_dates, + analysis_chunk_end_dates=analysis_result.chunk_end_dates, ) return figure