From 614b29597e60edf45ac1198d65dc79fe68fccd23 Mon Sep 17 00:00:00 2001 From: jnesfield Date: Thu, 11 Jul 2024 15:13:52 -0700 Subject: [PATCH] fixed plotting issues and issues in examples --- nannyml/data_quality/missing/calculator.py | 3 +-- nannyml/data_quality/missing/result.py | 3 +-- nannyml/data_quality/range/calculator.py | 3 +-- nannyml/data_quality/range/result.py | 26 ++++++++++++---------- nannyml/data_quality/unseen/calculator.py | 3 +-- nannyml/data_quality/unseen/result.py | 3 +-- 6 files changed, 19 insertions(+), 22 deletions(-) diff --git a/nannyml/data_quality/missing/calculator.py b/nannyml/data_quality/missing/calculator.py index 3a280c54..8ab3c8d5 100644 --- a/nannyml/data_quality/missing/calculator.py +++ b/nannyml/data_quality/missing/calculator.py @@ -76,8 +76,7 @@ def __init__( ... timestamp_column_name='timestamp', ... ).fit(reference_df) >>> res = calc.calculate(analysis_df) - >>> for column_name in res.column_names: - ... _ = res.filter(period='analysis', column_name=column_name).plot().show() + >>> res.filter(period='analysis').plot().show() """ super(MissingValuesCalculator, self).__init__( chunk_size, chunk_number, chunk_period, chunker, timestamp_column_name diff --git a/nannyml/data_quality/missing/result.py b/nannyml/data_quality/missing/result.py index 866ab916..d40b32e9 100644 --- a/nannyml/data_quality/missing/result.py +++ b/nannyml/data_quality/missing/result.py @@ -79,8 +79,7 @@ def plot( ... timestamp_column_name='timestamp', ... ).fit(reference) >>> res = calc.calculate(analysis) - >>> for column_name in res.column_names: - ... _ = res.filter(period='analysis', column_name=column_name).plot().show() + >>> res.filter(period='analysis').plot().show() """ return plot_metrics( diff --git a/nannyml/data_quality/range/calculator.py b/nannyml/data_quality/range/calculator.py index fa33de53..8b14f21d 100644 --- a/nannyml/data_quality/range/calculator.py +++ b/nannyml/data_quality/range/calculator.py @@ -74,8 +74,7 @@ def __init__( ... timestamp_column_name='timestamp', ... ).fit(reference_df) >>> res = calc.calculate(analysis_df) - >>> for column_name in res.column_names: - ... _ = res.filter(period='analysis', column_name=column_name).plot().show() + >>> res.filter(period='analysis').plot().show() """ super(NumericalRangeCalculator, self).__init__( chunk_size, chunk_number, chunk_period, chunker, timestamp_column_name diff --git a/nannyml/data_quality/range/result.py b/nannyml/data_quality/range/result.py index 943e69a6..28c02df8 100644 --- a/nannyml/data_quality/range/result.py +++ b/nannyml/data_quality/range/result.py @@ -18,16 +18,16 @@ from nannyml._typing import Key from nannyml.base import PerColumnResult from nannyml.chunk import Chunker + +# from nannyml.exceptions import InvalidArgumentsException from nannyml.plots.blueprints.comparisons import ResultCompareMixin from nannyml.plots.blueprints.metrics import plot_metrics +from nannyml.plots.components import Hover from nannyml.usage_logging import UsageEvent, log_usage class Result(PerColumnResult, ResultCompareMixin): - """Values Out Of Range Result Class. - - Contains calculation results and provides plotting functionality. - """ + """Contains the results of the univariate statistical drift calculation and provides plotting functionality.""" def __init__( self, @@ -37,14 +37,13 @@ def __init__( timestamp_column_name: Optional[str], chunker: Chunker, ): - """Values Out Of Range Result Class.""" super().__init__(results_data, column_names) self.timestamp_column_name = timestamp_column_name self.data_quality_metric = data_quality_metric self.chunker = chunker - def keys(self) -> List[Key]: # noqa: D102 + def keys(self) -> List[Key]: return [ Key( properties=(column_name,), @@ -53,13 +52,16 @@ def keys(self) -> List[Key]: # noqa: D102 for column_name in self.column_names ] - @log_usage(UsageEvent.DQ_CALC_VALUES_OUT_OF_RANGE_PLOT) + @log_usage(UsageEvent.DQ_CALC_UNSEEN_VALUES_PLOT) def plot( self, *args, **kwargs, ) -> go.Figure: - """Values Out Of Range results. + """ + + Parameters + ---------- Returns ------- @@ -73,16 +75,16 @@ def plot( -------- >>> import nannyml as nml >>> reference, analysis, _ = nml.load_synthetic_car_price_dataset() - >>> column_names = [col for col in reference.columns if col not in ['fuel','transmission','timestamp', 'y_pred', 'y_true']] - >>> calc = nml.NumericalRangeCalculator( + >>> column_names = [col for col in reference.columns if col not in ['car_age', 'km_driven', 'price_new', 'accident_count', 'door_count','timestamp', 'y_pred', 'y_true']] + >>> calc = nml.UnseenValuesCalculator( ... column_names=column_names, ... timestamp_column_name='timestamp', ... ).fit(reference) >>> res = calc.calculate(analysis) - >>> for column_name in res.column_names: - ... _ = res.filter(period='analysis', column_name=column_name).plot().show() + >>> res.filter(period='analysis').plot().show() """ + return plot_metrics( self, title='Data Quality ', diff --git a/nannyml/data_quality/unseen/calculator.py b/nannyml/data_quality/unseen/calculator.py index 9859fc0d..15605aef 100644 --- a/nannyml/data_quality/unseen/calculator.py +++ b/nannyml/data_quality/unseen/calculator.py @@ -75,8 +75,7 @@ def __init__( ... timestamp_column_name='timestamp', ... ).fit(reference) >>> res = calc.calculate(analysis) - >>> for column_name in res.column_names: - ... _ = res.filter(period='analysis', column_name=column_name).plot().show() + >>> res.filter(period='analysis').plot().show() """ super(UnseenValuesCalculator, self).__init__( chunk_size, chunk_number, chunk_period, chunker, timestamp_column_name diff --git a/nannyml/data_quality/unseen/result.py b/nannyml/data_quality/unseen/result.py index f57bc852..28c02df8 100644 --- a/nannyml/data_quality/unseen/result.py +++ b/nannyml/data_quality/unseen/result.py @@ -81,8 +81,7 @@ def plot( ... timestamp_column_name='timestamp', ... ).fit(reference) >>> res = calc.calculate(analysis) - >>> for column_name in res.column_names: - ... _ = res.filter(period='analysis', column_name=column_name).plot().show() + >>> res.filter(period='analysis').plot().show() """