From 4d8285e13cbd999d574d27d0315b08aebd5e7a15 Mon Sep 17 00:00:00 2001 From: Saksham Gupta Date: Wed, 7 Aug 2024 13:39:41 +0530 Subject: [PATCH] Update docstring as per numpy style format --- ioos_qc/argo.py | 40 ++-- ioos_qc/axds.py | 29 +-- ioos_qc/config.py | 56 +++-- ioos_qc/config_creator/config_creator.py | 33 +-- ioos_qc/qartod.py | 253 ++++++++++++++--------- ioos_qc/utils.py | 11 +- 6 files changed, 250 insertions(+), 172 deletions(-) diff --git a/ioos_qc/argo.py b/ioos_qc/argo.py index 7ab189b..0019912 100644 --- a/ioos_qc/argo.py +++ b/ioos_qc/argo.py @@ -24,12 +24,14 @@ def pressure_increasing_test(inp): Ref: ARGO QC Manual: 8. Pressure increasing test - Args - ---- - inp: Pressure values as a numeric numpy array or a list of numbers. + Parameters + ---------- + inp + Pressure values as a numeric numpy array or a list of numbers. Returns ------- + flag_arr A masked array of flag values equal in size to that of the input. """ @@ -68,25 +70,31 @@ def speed_test( Missing and masked data is flagged as UNKNOWN. If this test fails, it typically means that either a position or time is bad data, - or that a platform is mislabeled. + or that a platform is mislabeled. Ref: ARGO QC Manual: 5. Impossible speed test - Args - ---- - lon: Longitudes as a numeric numpy array or a list of numbers. - lat: Latitudes as a numeric numpy array or a list of numbers. - tinp: Time data as a sequence of datetime objects compatible with pandas DatetimeIndex. - This includes numpy datetime64, python datetime objects and pandas Timestamp object. - ie. pd.DatetimeIndex([datetime.utcnow(), np.datetime64(), pd.Timestamp.now()] - If anything else is passed in the format is assumed to be seconds since the unix epoch. - suspect_threshold: A float value representing a speed, in meters per second. - Speeds exceeding this will be flagged as SUSPECT. - fail_threshold: A float value representing a speed, in meters per second. - Speeds exceeding this will be flagged as FAIL. + Parameters + ---------- + lon + Longitudes as a numeric numpy array or a list of numbers. + lat + Latitudes as a numeric numpy array or a list of numbers. + tinp + Time data as a sequence of datetime objects compatible with pandas DatetimeIndex. + This includes numpy datetime64, python datetime objects and pandas Timestamp object. + ie. pd.DatetimeIndex([datetime.utcnow(), np.datetime64(), pd.Timestamp.now()]) + If anything else is passed in the format is assumed to be seconds since the unix epoch. + suspect_threshold + A float value representing a speed, in meters per second. + Speeds exceeding this will be flagged as SUSPECT. + fail_threshold + A float value representing a speed, in meters per second. + Speeds exceeding this will be flagged as FAIL. Returns ------- + flag_arr A masked array of flag values equal in size to that of the input. """ diff --git a/ioos_qc/axds.py b/ioos_qc/axds.py index cb0bc5a..79a8e8c 100644 --- a/ioos_qc/axds.py +++ b/ioos_qc/axds.py @@ -39,22 +39,25 @@ def valid_range_test( treated as inclusive and the second span valid is treated as exclusive. To change this behavior you can use the parameters `start_inclusive` and `end_inclusive`. - Args - ---- - inp (Sequence[any]): Data as a sequence of objects compatible with the fail_span objects - fail_span (Tuple[any, any]): 2-tuple range which to flag outside data as FAIL. Objects - should be of equal format to that of the inp parameter as they will be checked for - equality without type conversion. - dtype (np.dtype): Optional. If your data is not already numpy-typed you can specify its - dtype here. - start_inclusive (bool): Optional. If the starting span value should be inclusive (True) or - exclusive (False). - end_inclusive (bool): Optional. If the ending span value should be inclusive (True) or - exclusive (False). + Parameters + ---------- + inp (Sequence[any]) + Data as a sequence of objects compatible with the fail_span objects + fail_span (Tuple[any, any]) + 2-tuple range which to flag outside data as FAIL. Objects + should be of equal format to that of the inp parameter as they will be checked for + equality without type conversion. + dtype (np.dtype) + Optional. If your data is not already numpy-typed you can specify its dtype here. + start_inclusive (bool) + Optional. If the starting span value should be inclusive (True) or exclusive (False). + end_inclusive (bool) + Optional. If the ending span value should be inclusive (True) or exclusive (False). Returns ------- - np.ma.core.MaskedArray: A masked array of flag values equal in size to that of the input. + np.ma.core.MaskedArray + A masked array of flag values equal in size to that of the input. """ # Numpy array inputs diff --git a/ioos_qc/config.py b/ioos_qc/config.py index 1fe6fbc..b4ce2b1 100644 --- a/ioos_qc/config.py +++ b/ioos_qc/config.py @@ -5,7 +5,8 @@ Attributes ---------- - tw (namedtuple): The TimeWindow namedtuple definition +tw (namedtuple) + The TimeWindow namedtuple definition """ @@ -182,9 +183,10 @@ def run(self, **passedkwargs): def extract_calls(source) -> List[Call]: """Extracts call objects from a source object. - Args - ---- - source ([any]): The source of Call objects, this can be a: + Parameters + ---------- + source ([any]): + The source of Call objects, this can be a: * Call object * list of Call objects * list of objects with the 'calls' attribute @@ -193,7 +195,8 @@ def extract_calls(source) -> List[Call]: Returns ------- - List[Call]: List of extracted Call objects + List[Call] + List of extracted Call objects """ if isinstance(source, Call): @@ -234,9 +237,10 @@ def __init__( version=None, default_stream_key="_stream", ) -> None: - """Args - ---- - source: The QC configuration representation in one of the following formats: + """Parameters + ---------- + source: + The QC configuration representation in one of the following formats: python dict or odict JSON/YAML filepath (str or Path object) JSON/YAML str @@ -349,9 +353,10 @@ def add(self, source) -> None: types of objects accepted as the source parameter. The changes the internal .calls attribute and returns None. - Args - ---- - source ([any]): The source of Call objects, this can be a: + Parameters + ---------- + source ([any]) + The source of Call objects, this can be a: * Call object * list of Call objects * list of objects with the 'calls' attribute @@ -391,10 +396,14 @@ class ContextConfig: Attributes ---------- - config (odict): dict representation of the parsed ContextConfig source - region (GeometryCollection): A `shapely` object representing the valid geographic region - window (namedtuple): A TimeWindow object representing the valid time period - streams (odict): dict representation of the parsed Config objects + config (odict) + dict representation of the parsed ContextConfig source + region (GeometryCollection) + A `shapely` object representing the valid geographic region + window (namedtuple) + A TimeWindow object representing the valid time period + streams (odict) + dict representation of the parsed Config objects """ @@ -489,9 +498,10 @@ def add(self, source) -> None: types of objects accepted as the source parameter. The changes the internal .calls attribute and returns None. - Args - ---- - source ([any]): The source of Call objects, this can be a: + Parameters + ---------- + source ([any]) + The source of Call objects, this can be a: * Call object * list of Call objects * list of objects with the 'calls' attribute @@ -522,9 +532,10 @@ def __init__(self, source, default_stream_key="_stream") -> None: stream. This just sets up a stream with the name passed in as the "default_stream_key" parameter. - Args - ---- - source: The QC configuration representation in one of the following formats: + Parameters + ---------- + source + The QC configuration representation in one of the following formats: python dict or odict JSON/YAML filepath (str or Path object) JSON/YAML str @@ -532,7 +543,8 @@ def __init__(self, source, default_stream_key="_stream") -> None: netCDF4/xarray filepath netCDF4/xarray Dataset list of Call objects - default_stream_key: The internal name of the stream, defaults to "_stream" + default_stream_key + The internal name of the stream, defaults to "_stream" """ warnings.warn( diff --git a/ioos_qc/config_creator/config_creator.py b/ioos_qc/config_creator/config_creator.py index 28733d6..347c495 100644 --- a/ioos_qc/config_creator/config_creator.py +++ b/ioos_qc/config_creator/config_creator.py @@ -178,12 +178,14 @@ def __str__(self) -> str: class QcVariableConfig(dict): """Used to generate a QcConfig for a specific variable. - Args - ---- - path_or_dict: QcVariableConfig configuration, one of the following formats: + Parameters + ---------- + path_or_dict + QcVariableConfig configuration, one of the following formats: python dict JSON filepath (str or Path object) - schema: JSON schema for QcVariable + schema + JSON schema for QcVariable """ @@ -255,12 +257,15 @@ class QcConfigCreator: Arguments --------- - creator_config (QcCreatorConfig): Configuration for datasets and variables used to create qc_config. + creator_config (QcCreatorConfig) + Configuration for datasets and variables used to create qc_config. Attributes ---------- - allowed_stats (list): Specific statistics allowed to be used to configure each test. - allowed_operators (list): Operators allowed to used to configure each test. + allowed_stats (list) + Specific statistics allowed to be used to configure each test. + allowed_operators (list) + Operators allowed to used to configure each test. """ @@ -272,13 +277,15 @@ def __init__(self, creator_config) -> None: def create_config(self, variable_config): """Create QARTOD QC config given QcVariableConfig. - Args - ---- - variable_config (QcVariableConfig): Config for variable to be quality controlled + Parameters + ---------- + variable_config (QcVariableConfig) + Config for variable to be quality controlled Returns ------- - qc_config (dict): Config for ioos_qc + qc_config (dict) + Config for ioos_qc """ stats = self._get_stats(variable_config) @@ -307,8 +314,8 @@ def _determine_dataset_years(self): Notes ----- - - Each dataset is from a unique climatology or source, - so the monthly files have different years. + - Each dataset is from a unique climatology or source, + so the monthly files have different years. """ years = {} diff --git a/ioos_qc/qartod.py b/ioos_qc/qartod.py index 53f45fa..c282c43 100644 --- a/ioos_qc/qartod.py +++ b/ioos_qc/qartod.py @@ -63,12 +63,14 @@ def qartod_compare( ) -> np.ma.MaskedArray: """Aggregates an array of flags by precedence into a single array. - Args - ---- - vectors: An array of uniform length arrays representing individual flags + Parameters + ---------- + vectors + An array of uniform length arrays representing individual flags Returns ------- + flag_arr A masked array of aggregated flag data. """ @@ -114,15 +116,20 @@ def location_test( also use a unit from the quantities library. Missing and masked data is flagged as UNKNOWN. - Args - ---- - lon: Longitudes as a numeric numpy array or a list of numbers. - lat: Latitudes as a numeric numpy array or a list of numbers. - bbox: A length 4 tuple expressed in (minx, miny, maxx, maxy) [optional]. - range_max: Maximum allowed range expressed in geodesic curve distance (meters). + Parameters + ---------- + lon + Longitudes as a numeric numpy array or a list of numbers. + lat + Latitudes as a numeric numpy array or a list of numbers. + bbox + A length 4 tuple expressed in (minx, miny, maxx, maxy) [optional]. + range_max + Maximum allowed range expressed in geodesic curve distance (meters). Returns ------- + flag_arr A masked array of flag values equal in size to that of the input. """ @@ -192,14 +199,18 @@ def gross_range_test( range as FAIL data. Optionally also flag data which falls outside of a user defined range as SUSPECT. Missing and masked data is flagged as UNKNOWN. - Args - ---- - inp: Input data as a numeric numpy array or a list of numbers. - fail_span: 2-tuple range which to flag outside data as FAIL. - suspect_span: 2-tuple range which to flag outside data as SUSPECT. [optional] + Parameters + ---------- + inp + Input data as a numeric numpy array or a list of numbers. + fail_span + 2-tuple range which to flag outside data as FAIL. + suspect_span + 2-tuple range which to flag outside data as SUSPECT. [optional] Returns ------- + flag_arr A masked array of flag values equal in size to that of the input. """ @@ -241,25 +252,30 @@ def gross_range_test( class ClimatologyConfig: """Objects to hold the config for a Climatology test. - Args - ---- - tspan: 2-tuple range. - If period is defined, then this is a numeric range. - If period is not defined, then its a date range. - fspan: (optional) 2-tuple range of valid values. This is passed in as the fail_span to the gross_range_test. - vspan: 2-tuple range of valid values. This is passed in as the suspect_span to the gross_range test. - zspan: (optional) Vertical (depth) range, in meters positive down - period: (optional) The unit the tspan argument is in. Defaults to datetime object - but can also be any attribute supported by a pandas Timestamp object. - - See: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timestamp.html - - Options: - * year - * week / weekofyear - * dayofyear - * dayofweek - * quarter + Parameters + ---------- + tspan + 2-tuple range. + If period is defined, then this is a numeric range. + If period is not defined, then its a date range. + fspan + (optional) 2-tuple range of valid values. This is passed in as the fail_span to the gross_range_test. + vspan + 2-tuple range of valid values. This is passed in as the suspect_span to the gross_range test. + zspan + (optional) Vertical (depth) range, in meters positive down + period + (optional) The unit the tspan argument is in. Defaults to datetime object + but can also be any attribute supported by a pandas Timestamp object. + + See: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timestamp.html + + Options: + * year + * week / weekofyear + * dayofyear + * dayofweek + * quarter """ @@ -283,9 +299,10 @@ def members(self): return self._members def values(self, tind: pd.Timestamp, zind=None): - """Args - ---- - tind: Value to test for inclusion between time bounds + """Parameters + ---------- + tind + Value to test for inclusion between time bounds """ span = (None, None) @@ -469,20 +486,25 @@ def climatology_test( Data for which no ClimatologyConfig member exists is marked as UNKNOWN. - Args - ---- - config: A ClimatologyConfig object or a list of dicts containing tuples - that can be used to create a ClimatologyConfig object. See ClimatologyConfig - docs for more info. - inp: Input data as a numeric numpy array or a list of numbers. - tinp: Time data as a sequence of datetime objects compatible with pandas DatetimeIndex. - This includes numpy datetime64, python datetime objects and pandas Timestamp object. - ie. pd.DatetimeIndex([datetime.utcnow(), np.datetime64(), pd.Timestamp.now()] - If anything else is passed in the format is assumed to be seconds since the unix epoch. - zinp: Z (depth) data, in meters positive down, as a numeric numpy array or a list of numbers. + Parameters + ---------- + config + A ClimatologyConfig object or a list of dicts containing tuples + that can be used to create a ClimatologyConfig object. See ClimatologyConfig + docs for more info. + inp + Input data as a numeric numpy array or a list of numbers. + tinp + Time data as a sequence of datetime objects compatible with pandas DatetimeIndex. + This includes numpy datetime64, python datetime objects and pandas Timestamp object. + ie. pd.DatetimeIndex([datetime.utcnow(), np.datetime64(), pd.Timestamp.now()]) + If anything else is passed in the format is assumed to be seconds since the unix epoch. + zinp + Z (depth) data, in meters positive down, as a numeric numpy array or a list of numbers. Returns ------- + flag_arr A masked array of flag values equal in size to that of the input. """ @@ -529,12 +551,16 @@ def spike_test( and values which exceed the high threshold are flagged FAIL. Missing and masked data is flagged as UNKNOWN. - Args - ---- - inp: Input data as a numeric numpy array or a list of numbers. - suspect_threshold: The SUSPECT threshold value, in observations units. - fail_threshold: The SUSPECT threshold value, in observations units. - method: ['average'(default),'differential'] optional input to assign the method used to detect spikes. + Parameters + ---------- + inp + Input data as a numeric numpy array or a list of numbers. + suspect_threshold + The SUSPECT threshold value, in observations units. + fail_threshold + The SUSPECT threshold value, in observations units. + method + ['average'(default),'differential'] optional input to assign the method used to detect spikes. * "average": Determine if there is a spike at data point n-1 by subtracting the midpoint of n and n-2 and taking the absolute value of this quantity, and checking if it exceeds a low or high threshold. @@ -544,6 +570,7 @@ def spike_test( Returns ------- + flag_arr A masked array of flag values equal in size to that of the input. """ @@ -619,18 +646,21 @@ def rate_of_change_test( exceed. Threshold is expressed as a rate in observations units per second. Missing and masked data is flagged as UNKNOWN. - Args - ---- - inp: Input data as a numeric numpy array or a list of numbers. - tinp: Time data as a sequence of datetime objects compatible with pandas DatetimeIndex. - This includes numpy datetime64, python datetime objects and pandas Timestamp object. - ie. pd.DatetimeIndex([datetime.utcnow(), np.datetime64(), pd.Timestamp.now()]) - If anything else is passed in the format is assumed to be seconds since the unix epoch. - threshold: A float value representing a rate of change over time, - in observation units per second. + Parameters + ---------- + inp + Input data as a numeric numpy array or a list of numbers. + tinp + Time data as a sequence of datetime objects compatible with pandas DatetimeIndex. + This includes numpy datetime64, python datetime objects and pandas Timestamp object. + ie. pd.DatetimeIndex([datetime.utcnow(), np.datetime64(), pd.Timestamp.now()]) + If anything else is passed in the format is assumed to be seconds since the unix epoch. + threshold + A float value representing a rate of change over time, in observation units per second. Returns ------- + flag_arr A masked array of flag values equal in size to that of the input. """ @@ -677,24 +707,28 @@ def flat_line_test( Missing and masked data is flagged as UNKNOWN. More information: https://github.com/ioos/ioos_qc/pull/11. - Args - ---- - inp: Input data as a numeric numpy array or a list of numbers. - tinp: Time data as a sequence of datetime objects compatible with pandas DatetimeIndex. - This includes numpy datetime64, python datetime objects and pandas Timestamp object. - ie. pd.DatetimeIndex([datetime.utcnow(), np.datetime64(), pd.Timestamp.now()] - If anything else is passed in the format is assumed to be seconds since the unix epoch. - suspect_threshold: The number of seconds within `tolerance` to - allow before being flagged as SUSPECT. - fail_threshold: The number of seconds within `tolerance` to - allow before being flagged as FAIL. - tolerance: The tolerance that should be exceeded between consecutive values. - To determine if the current point `n` should be flagged, we use a rolling window, with endpoint at - point `n`, and calculate the range of values in the window. If that range is less than `tolerance`, - then the point is flagged. + Parameters + ---------- + inp + Input data as a numeric numpy array or a list of numbers. + tinp + Time data as a sequence of datetime objects compatible with pandas DatetimeIndex. + This includes numpy datetime64, python datetime objects and pandas Timestamp object. + ie. pd.DatetimeIndex([datetime.utcnow(), np.datetime64(), pd.Timestamp.now()]) + If anything else is passed in the format is assumed to be seconds since the unix epoch. + suspect_threshold + The number of seconds within `tolerance` to allow before being flagged as SUSPECT. + fail_threshold + The number of seconds within `tolerance` to allow before being flagged as FAIL. + tolerance + The tolerance that should be exceeded between consecutive values. + To determine if the current point `n` should be flagged, we use a rolling window, with endpoint at + point `n`, and calculate the range of values in the window. If that range is less than `tolerance`, + then the point is flagged. Returns ------- + flag_arr A masked array of flag values equal in size to that of the input. """ @@ -777,27 +811,35 @@ def attenuated_signal_test( Missing and masked data is flagged as UNKNOWN. - Args - ---- - inp: Input data as a numeric numpy array or a list of numbers. - tinp: Time input data as a numpy array of dtype `datetime64`. - suspect_threshold: Any calculated value below this amount will be flagged as SUSPECT. - In observations units. - fail_threshold: Any calculated values below this amount will be flagged as FAIL. - In observations units. - test_period: Length of time to test over in seconds [optional]. - Otherwise, will test against entire `inp`. - min_obs: Minimum number of observations in window required to calculate a result [optional]. - Otherwise, test will start at beginning of time series. - Note: you can specify either `min_obs` or `min_period`, but not both. - min_period: Minimum number of seconds in test_period required to calculate a result [optional]. - Otherwise, test will start at beginning of time series. - Note: you can specify either `min_obs` or `min_period`, but not both. - check_type: Either 'std' (default) or 'range', depending on the type of check - you wish to perform. + Parameters + ---------- + inp + Input data as a numeric numpy array or a list of numbers. + tinp + Time input data as a numpy array of dtype `datetime64`. + suspect_threshold + Any calculated value below this amount will be flagged as SUSPECT. + In observations units. + fail_threshold + Any calculated values below this amount will be flagged as FAIL. + In observations units. + test_period + Length of time to test over in seconds [optional]. + Otherwise, will test against entire `inp`. + min_obs + Minimum number of observations in window required to calculate a result [optional]. + Otherwise, test will start at beginning of time series. + Note: you can specify either `min_obs` or `min_period`, but not both. + min_period + Minimum number of seconds in test_period required to calculate a result [optional]. + Otherwise, test will start at beginning of time series. + Note: you can specify either `min_obs` or `min_period`, but not both. + check_type + Either 'std' (default) or 'range', depending on the type of check you wish to perform. Returns ------- + flag_arr A masked array of flag values equal in size to that of the input. This array will always contain only a single unique value since all input data is flagged together. @@ -881,17 +923,22 @@ def density_inversion_test( Ref: Manual for Real-Time Quality Control of in-situ Temperature and Salinity Data, Version 2.0, January 2016 - Args - ---- - inp: Potential density values as a numeric numpy array or a list of numbers. - zinp: Corresponding depth/pressure values for each density. - suspect_threshold: A float value representing a maximum potential density(or sigma0) - variation to be tolerated, downward density variation exceeding this will be flagged as SUSPECT. - fail_threshold: A float value representing a maximum potential density(or sigma0) - variation to be tolerated, downward density variation exceeding this will be flagged as FAIL. + Parameters + ---------- + inp + Potential density values as a numeric numpy array or a list of numbers. + zinp + Corresponding depth/pressure values for each density. + suspect_threshold + A float value representing a maximum potential density(or sigma0) + variation to be tolerated, downward density variation exceeding this will be flagged as SUSPECT. + fail_threshold + A float value representing a maximum potential density(or sigma0) + variation to be tolerated, downward density variation exceeding this will be flagged as FAIL. Returns ------- + flag_arr A masked array of flag values equal in size to that of the input. """ diff --git a/ioos_qc/utils.py b/ioos_qc/utils.py index ad0b8e5..a601710 100644 --- a/ioos_qc/utils.py +++ b/ioos_qc/utils.py @@ -211,11 +211,12 @@ def check_timestamps( proper order and optionally do not have large gaps prior to processing the data. - Args - ---- - times: Input array of timestamps - max_time_interval: The interval between values should not exceed this - value. [optional] + Parameters + ---------- + times + Input array of timestamps + max_time_interval + The interval between values should not exceed this value. [optional] """ time_diff = np.diff(times)