Skip to content

Commit

Permalink
fixed plotting issues and issues in examples
Browse files Browse the repository at this point in the history
  • Loading branch information
jnesfield committed Jul 11, 2024
1 parent 14ce704 commit 614b295
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 22 deletions.
3 changes: 1 addition & 2 deletions nannyml/data_quality/missing/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,7 @@ def __init__(
... timestamp_column_name='timestamp',
... ).fit(reference_df)
>>> res = calc.calculate(analysis_df)
>>> for column_name in res.column_names:
... _ = res.filter(period='analysis', column_name=column_name).plot().show()
>>> res.filter(period='analysis').plot().show()
"""
super(MissingValuesCalculator, self).__init__(
chunk_size, chunk_number, chunk_period, chunker, timestamp_column_name
Expand Down
3 changes: 1 addition & 2 deletions nannyml/data_quality/missing/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,7 @@ def plot(
... timestamp_column_name='timestamp',
... ).fit(reference)
>>> res = calc.calculate(analysis)
>>> for column_name in res.column_names:
... _ = res.filter(period='analysis', column_name=column_name).plot().show()
>>> res.filter(period='analysis').plot().show()
"""
return plot_metrics(
Expand Down
3 changes: 1 addition & 2 deletions nannyml/data_quality/range/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,7 @@ def __init__(
... timestamp_column_name='timestamp',
... ).fit(reference_df)
>>> res = calc.calculate(analysis_df)
>>> for column_name in res.column_names:
... _ = res.filter(period='analysis', column_name=column_name).plot().show()
>>> res.filter(period='analysis').plot().show()
"""
super(NumericalRangeCalculator, self).__init__(
chunk_size, chunk_number, chunk_period, chunker, timestamp_column_name
Expand Down
26 changes: 14 additions & 12 deletions nannyml/data_quality/range/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@
from nannyml._typing import Key
from nannyml.base import PerColumnResult
from nannyml.chunk import Chunker

# from nannyml.exceptions import InvalidArgumentsException
from nannyml.plots.blueprints.comparisons import ResultCompareMixin
from nannyml.plots.blueprints.metrics import plot_metrics
from nannyml.plots.components import Hover
from nannyml.usage_logging import UsageEvent, log_usage


class Result(PerColumnResult, ResultCompareMixin):
"""Values Out Of Range Result Class.
Contains calculation results and provides plotting functionality.
"""
"""Contains the results of the univariate statistical drift calculation and provides plotting functionality."""

def __init__(
self,
Expand All @@ -37,14 +37,13 @@ def __init__(
timestamp_column_name: Optional[str],
chunker: Chunker,
):
"""Values Out Of Range Result Class."""
super().__init__(results_data, column_names)

self.timestamp_column_name = timestamp_column_name
self.data_quality_metric = data_quality_metric
self.chunker = chunker

def keys(self) -> List[Key]: # noqa: D102
def keys(self) -> List[Key]:
return [
Key(
properties=(column_name,),
Expand All @@ -53,13 +52,16 @@ def keys(self) -> List[Key]: # noqa: D102
for column_name in self.column_names
]

@log_usage(UsageEvent.DQ_CALC_VALUES_OUT_OF_RANGE_PLOT)
@log_usage(UsageEvent.DQ_CALC_UNSEEN_VALUES_PLOT)
def plot(
self,
*args,
**kwargs,
) -> go.Figure:
"""Values Out Of Range results.
"""
Parameters
----------
Returns
-------
Expand All @@ -73,16 +75,16 @@ def plot(
--------
>>> import nannyml as nml
>>> reference, analysis, _ = nml.load_synthetic_car_price_dataset()
>>> column_names = [col for col in reference.columns if col not in ['fuel','transmission','timestamp', 'y_pred', 'y_true']]
>>> calc = nml.NumericalRangeCalculator(
>>> column_names = [col for col in reference.columns if col not in ['car_age', 'km_driven', 'price_new', 'accident_count', 'door_count','timestamp', 'y_pred', 'y_true']]
>>> calc = nml.UnseenValuesCalculator(
... column_names=column_names,
... timestamp_column_name='timestamp',
... ).fit(reference)
>>> res = calc.calculate(analysis)
>>> for column_name in res.column_names:
... _ = res.filter(period='analysis', column_name=column_name).plot().show()
>>> res.filter(period='analysis').plot().show()
"""

return plot_metrics(
self,
title='Data Quality ',
Expand Down
3 changes: 1 addition & 2 deletions nannyml/data_quality/unseen/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,7 @@ def __init__(
... timestamp_column_name='timestamp',
... ).fit(reference)
>>> res = calc.calculate(analysis)
>>> for column_name in res.column_names:
... _ = res.filter(period='analysis', column_name=column_name).plot().show()
>>> res.filter(period='analysis').plot().show()
"""
super(UnseenValuesCalculator, self).__init__(
chunk_size, chunk_number, chunk_period, chunker, timestamp_column_name
Expand Down
3 changes: 1 addition & 2 deletions nannyml/data_quality/unseen/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,7 @@ def plot(
... timestamp_column_name='timestamp',
... ).fit(reference)
>>> res = calc.calculate(analysis)
>>> for column_name in res.column_names:
... _ = res.filter(period='analysis', column_name=column_name).plot().show()
>>> res.filter(period='analysis').plot().show()
"""

Expand Down

0 comments on commit 614b295

Please sign in to comment.