From d43be9594d82af127edd9d79d82c45e7dd9cd8ca Mon Sep 17 00:00:00 2001 From: Kishan Savant Date: Wed, 8 Oct 2025 23:19:41 +0530 Subject: [PATCH 1/4] Updated to google style dosctring --- cdippy/stndata.py | 152 ++++++++++++++++++++++++++-------------------- 1 file changed, 86 insertions(+), 66 deletions(-) diff --git a/cdippy/stndata.py b/cdippy/stndata.py index fcfec6f..0ceee3a 100644 --- a/cdippy/stndata.py +++ b/cdippy/stndata.py @@ -101,19 +101,13 @@ class StnData(CDIPnc): def __init__( self, stn: str, data_dir: str = None, org: str = None, deploy_num: int = None ): - """ - PARAMETERS - ---------- - stn : str - Can be in 2, 3 or 5 char format e.g. 28, 028, 028p2 - data_dir : str [optional] - Either a full path to a directory containing a local directory hierarchy - of nc files. E.g. '/project/WNC' or a url to a THREDDS server. - org: str - (Organization) Values are: cdip|ww3|external - deploy_num : int [optional] - Supply this to access specific station deployment data. - Must be >= 1. + """Initializes StnData for a given CDIP station. + + Args: + stn (str): Station identifier in 2, 3, or 5 character format (e.g. "28", "028", "028p2"). + data_dir (str, optional): Path to directory containing netCDF files or a THREDDS URL. + org (str): Data organization, one of {"cdip", "ww3", "external"}. + deploy_num (int, optional): Deployment number (>=1) to access specific station deployment data. """ self.nc = None self.stn = stn @@ -151,7 +145,11 @@ def __init__( return None def get_stn_meta(self) -> dict: - """Returns a dict of station meta data.""" + """Returns a dictionary of station metadata. + + Returns: + dict: A dictionary containing metadata variables and global attributes. + """ result = {} if self.meta is None: return result @@ -170,7 +168,18 @@ def get_parameters( apply_mask=True, target_records=0, ) -> dict: - """Calls get_series to return wave parameters.""" + """Returns wave parameter data using get_series. + + Args: + start (datetime, optional): Start time of data request (UTC). + end (datetime, optional): End time of data request (UTC). + pub_set (str, optional): Data quality filter. One of {"public", "nonpub", "all"}. Defaults to "public". + apply_mask (bool, optional): Whether to apply mask filtering. Defaults to True. + target_records (int, optional): Number of records to return if end is not specified. + + Returns: + dict: Dictionary of wave parameter data arrays. + """ return self.get_series( start, end, self.parameter_vars, pub_set, apply_mask, target_records ) @@ -178,7 +187,16 @@ def get_parameters( def get_xyz( self, start: datetime = None, end: datetime = None, pub_set: str = "public" ) -> dict: - """Calls get_series to return displacement data.""" + """Returns displacement (XYZ) data using get_series. + + Args: + start (datetime, optional): Start time of data request (UTC). + end (datetime, optional): End time of data request (UTC). + pub_set (str, optional): Data quality filter. Defaults to "public". + + Returns: + dict: Dictionary of XYZ displacement data. + """ return self.get_series(start, end, self.xyz_vars, pub_set) def get_spectra( @@ -190,7 +208,19 @@ def get_spectra( target_records: int = 0, force_64bands: bool = False, ) -> dict: - """Calls get_series to return spectral data.""" + """Returns spectral data using get_series. + + Args: + start (datetime, optional): Start time of data request (UTC). + end (datetime, optional): End time of data request (UTC). + pub_set (str, optional): Data quality filter. Defaults to "public". + apply_mask (bool, optional): Whether to apply mask filtering. Defaults to True. + target_records (int, optional): Number of records to return if end is not specified. + force_64bands (bool, optional): If True, converts all spectra to 64-band format. + + Returns: + dict: Dictionary of spectral data arrays. + """ return self.get_series( start, end, @@ -211,36 +241,21 @@ def get_series( target_records: int = 0, force_64bands: bool = False, ) -> dict: + """Returns data for a station between specified start and end dates. + + Args: + start (datetime or str, optional): Start time of data request (UTC). + end (datetime or str, optional): End time of data request (UTC). + vrs (list, optional): List of variable names to retrieve. + pub_set (str, optional): Data quality filter. One of {"public", "nonpub", "all"}. + apply_mask (bool, optional): Whether to apply mask filtering. + target_records (int, optional): Number of records to return when end is None. + force_64bands (bool, optional): Whether to force conversion of spectra to 64 bands. + + Returns: + dict: Dictionary of requested variable arrays. """ - Returns a dict of data between start and end dates with specified quality. - - PARAMETERS - ---------- - start : str or datetime [optional] : default Jan 1, 1975 - Start time of data request (UTC). If provided as a string must - be in the format Y-m-d H:M:S where Y is 4 chars and all others - are 2 chars. Ex. '2020-03-30 19:32:56'. - end : str or datetime [optional] : default now - End time of data request (UTC). If not supplied defaults to now. - vrs : list [optional] : default ['waveHs'] - A list of the names of variables to retrieve. They all must start - with the same prefix, e.g. ['waveHs', 'waveTp', 'waveDp'] - pub_set: str [optional] values = public|nonpub|all - Filters data based on data quality flags. - apply_mask: bool [optional] default True - Removes values from the masked array that have a mask value of True. - Ex. If nonpub data is requested and apply_mask is False, the returned - array will contain both public and nonpublic data (although public - data records will have the mask value set to True). If apply_mask - is set to True, only nonpub records will be returned. - target_records: int [optional] - If start is specified and end is None, this will specify the number - of additional records to return closest to start. - force_64bands: bool [optional] - For the case in which all spectra returned are mk4 100 band format, - force the conversion to 64bands. Mixed formats are always returned in mk3 - 64 band format. - """ + if vrs is None: vrs = self.parameter_vars prefix = self.get_var_prefix(vrs[0]) @@ -363,7 +378,14 @@ def __merge_xyz_helper( return result, start_stamp def remove_duplicates(self, data_dict: dict) -> dict: - """Duplicate records may exist after merge_ routines. This removes them.""" + """Removes duplicate records after merging multiple datasets. + + Args: + data_dict (dict): Dictionary of merged data arrays. + + Returns: + dict: Dictionary with duplicate records removed. + """ result = {} keys = list(data_dict.keys()) if len(keys) > 0: @@ -505,7 +527,14 @@ def __merge_request(self): return result def get_nc_files(self, types: list = nc_file_types) -> dict: - """Returns dict of netCDF4 objects of a station's netcdf files""" + """Returns all available netCDF files for a station. + + Args: + types (list, optional): List of file types to include. Defaults to all nc_file_types. + + Returns: + dict: Dictionary mapping filenames to netCDF objects. + """ result = {} for ftype in types: if ftype == "historic": @@ -531,24 +560,15 @@ def get_nc_files(self, types: list = nc_file_types) -> dict: def get_target_timespan( self, target_timestamp: int, num_target_records: int, time_var: str ) -> tuple: - """Returns a timespan containing the n closest records to the target_timestamp. - - PARAMETERS - ---------- - target_timestamp : int - A unix timestamp which is the target time about which the closest - n records will be returned. - n : int - The number of records to return that are closest to the target - timestamp. - time_var : str - The name of the time dimension variable to use. E.g. waveTime. - - RETURNS - ------- - A 2-tuple of timestamps corresponding to i and i+n (where n may - be negative) which will be the timestamps for the n records - closest to the target_timestamp. + """Finds a timespan containing the n records closest to a target timestamp. + + Args: + target_timestamp (int): Target UNIX timestamp. + num_target_records (int): Number of records to return. + time_var (str): Name of time variable (e.g. "waveTime"). + + Returns: + tuple: (start_timestamp, end_timestamp, direction), or (None, None, None) if not found. """ r_ok = False if self.realtime.nc is not None: From d38bf70412cf76b1f8baa08485bb7162a04564da Mon Sep 17 00:00:00 2001 From: Kishan Savant Date: Thu, 9 Oct 2025 23:30:36 +0530 Subject: [PATCH 2/4] Updated docstring to google style for cdipnc.py --- cdippy/cdipnc.py | 435 +++++++++++++++++++++++++++++++---------------- 1 file changed, 287 insertions(+), 148 deletions(-) diff --git a/cdippy/cdipnc.py b/cdippy/cdipnc.py index 3b14e4c..69cd0bf 100644 --- a/cdippy/cdipnc.py +++ b/cdippy/cdipnc.py @@ -103,16 +103,14 @@ class CDIPnc: # 5. Apply the mask if self.apply_mask set True. def __init__(self, data_dir: str = None, deployment: int = None): - """PARAMETERS - ---------- - data_dir : str [optional] - Either a full path to a directory containing a local directory hierarchy - of nc files. E.g. '/project/WNC' or a url to a THREDDS server. - deployment : int [optional] - Supply this to access specific station deployment data. - Must be >= 1. - """ + """Initialize a CDIPnc instance. + Args: + data_dir (str, optional): Full path to a directory containing local nc files + or a URL to a THREDDS server. Examples: '/project/WNC' or THREDDS URL. + deployment (int, optional): Station deployment number (>=1) to access specific data. + """ + self.nc = None self.data_dir = data_dir self.deployment = deployment @@ -125,27 +123,15 @@ def set_request_info( pub_set: str = "public", apply_mask: bool = True, ) -> None: - """Initializes data request information for get_request. - - PARAMETERS - ---------- - start : str or datetime [optional] : default Jan 1, 1975 - Start time of data request (UTC). If provided as a string must - be in the format Y-m-d H:M:S where Y is 4 chars and all others - are 2 chars. Ex. '2020-03-30 19:32:56'. - end : str or datetime [optional] : default now - End time of data request (UTC). If not supplied defaults to now. - vrs : list [optional] : default ['waveHs'] - A list of the names of variables to retrieve. They all must start - with the same prefix, e.g. ['waveHs', 'waveTp', 'waveDp'] - pub_set: str [optional] values = public|nonpub|all - Filters data based on data quality flags. - apply_mask: bool [optional] default True - Removes values from the masked array that have a mask value of True. - Ex. If nonpub data is requested and apply_mask is False, the returned - array will contain both public and nonpublic data (although public - data records will have the mask value set to True). If apply_mask - is set to True, only nonpub records will be returned. + """Initialize data request parameters for get_request. + + Args: + start (datetime or str, optional): Start time of data request (UTC). Defaults to + Jan 1, 1975. String format must be 'YYYY-MM-DD HH:MM:SS'. + end (datetime or str, optional): End time of data request (UTC). Defaults to now. + vrs (list, optional): List of variable names to retrieve. Defaults to ['waveHs']. + pub_set (str, optional): Filter by data quality. Options: 'public', 'nonpub', 'all'. + apply_mask (bool, optional): If True, removes masked values. Defaults to True. """ if start is None: start = datetime(1975, 1, 1).replace(tzinfo=timezone.utc) @@ -158,7 +144,14 @@ def set_request_info( self.vrs = vrs def set_timespan(self, start, end): - """Sets request timespan""" + """Set the start and end times for the data request. + + Converts string inputs to datetime objects with UTC timezone. + + Args: + start (datetime or str): Start time. + end (datetime or str): End time. + """ if isinstance(start, str): self.start_dt = datetime.strptime(start, "%Y-%m-%d %H:%M:%S").replace( tzinfo=timezone.utc @@ -176,15 +169,12 @@ def set_timespan(self, start, end): self.end_stamp = cdip_utils.datetime_to_timestamp(self.end_dt) def get_request(self) -> dict: - """Returns the data specified using set_request_info. - - RETURNS - ------- - A dictionary containing keys of the requested variables each - of which is a numpy masked array of data values. In addition, - the time values are returned as well. For example, if waveHs - was requested, the dictionary will look like this: - {'waveHs': , 'waveTime': } + """Retrieve requested data as a dictionary of masked arrays. + + Returns: + dict: Keys are variable names and time arrays. Values are numpy masked arrays + of data values. + Example: {'waveHs': , 'waveTime': } """ mask_results = {} save = {} @@ -292,8 +282,16 @@ def __make_masked_array( return arr def make_pub_mask(self, anc_name: str, s_idx: int, e_idx: int) -> np.ndarray: - """Returns an np.ndarray of bools given pub_set and ancillary var""" + """Generate a boolean mask array based on the publication flag. + + Args: + anc_name (str): Name of the ancillary variable. + s_idx (int): Start index. + e_idx (int): End index. + Returns: + np.ndarray: Boolean array for public/non-public filtering. + """ # No s_idx, use whole array. Otherwise time subset the anc var. nc_primary = self.get_var(anc_name) if s_idx is None: @@ -325,9 +323,13 @@ def make_pub_mask(self, anc_name: str, s_idx: int, e_idx: int) -> np.ndarray: return np.ma.make_mask(primary_flag_values < 0, shrink=False) def get_pub_set(self, name: str) -> str: - """Returns either 'public', 'nonpub' or 'all'. + """Standardize publication set name. - Maintains backwards compatibility with prior pub_set names. + Args: + name (str): Input publication set name. + + Returns: + str: One of 'public', 'nonpub', or 'all'. """ if name is None or name not in self.pub_set_map.keys(): return self.pub_set_default @@ -335,7 +337,14 @@ def get_pub_set(self, name: str) -> str: return self.pub_set_map[name] def get_var_prefix(self, var_name: str) -> str: - """Returns 'wave' part of the string 'waveHs'.""" + """Extract prefix from a variable name (e.g., 'wave' from 'waveHs'). + + Args: + var_name (str): Variable name. + + Returns: + str: Prefix string. + """ s = "" for c in var_name: if c.isupper(): @@ -344,11 +353,25 @@ def get_var_prefix(self, var_name: str) -> str: return s def get_flag_meanings(self, flag_name: str) -> list: - """Returns flag category values and meanings given a flag_name.""" + """Get flag meanings for a variable. + + Args: + flag_name (str): Flag variable name. + + Returns: + list: List of flag meanings. + """ return self.get_var(flag_name).flag_meanings.split(" ") def get_flag_values(self, flag_name: str) -> list: - """Returns flag category values and meanings given a flag_name.""" + """Get flag values for a variable. + + Args: + flag_name (str): Flag variable name. + + Returns: + list: List of flag values. + """ v = self.get_var(flag_name) if flag_name[0:3] == "gps": return v.flag_masks @@ -356,15 +379,27 @@ def get_flag_values(self, flag_name: str) -> list: return v.flag_values def get_date_modified(self) -> datetime: - """Returns the time the nc file was last modified.""" + """Get the last modification date of the NetCDF file. + + Returns: + datetime: Date and time the file was last modified. + """ return datetime.strptime(self.nc.date_modified, "%Y-%m-%dT%H:%M:%SZ") def get_coverage_start(self) -> datetime: - """Returns the start time of the nc file data coverage.""" + """Get the start time of data coverage in the NetCDF file. + + Returns: + datetime: Start of coverage. + """ return datetime.strptime(self.nc.time_coverage_start, "%Y-%m-%dT%H:%M:%SZ") def get_coverage_end(self) -> datetime: - """Returns the end time of the nc file data coverage.""" + """Get the end time of data coverage in the NetCDF file. + + Returns: + datetime: End of coverage. + """ return datetime.strptime(self.nc.time_coverage_end, "%Y-%m-%dT%H:%M:%SZ") def __get_indices(self, times: list, start_stamp: int, end_stamp: int) -> tuple: @@ -375,6 +410,15 @@ def __get_indices(self, times: list, start_stamp: int, end_stamp: int) -> tuple: return s_idx, e_idx def get_nc(self, url: str = None, retry: bool = False) -> netCDF4.Dataset: + """Open a NetCDF dataset, optionally retrying on failure. + + Args: + url (str, optional): Path or URL to NetCDF file. Defaults to self.url. + retry (bool, optional): Retry once if opening fails. + + Returns: + netCDF4.Dataset or None: Dataset object if successful, else None. + """ if not url: url = self.url try: @@ -395,6 +439,14 @@ def get_nc(self, url: str = None, retry: bool = False) -> netCDF4.Dataset: return None def byte_arr_to_string(self, b_arr: np.ma.masked_array) -> str: + """Convert a masked array of bytes to a string. + + Args: + b_arr (np.ma.masked_array): Masked array of bytes. + + Returns: + str: Concatenated string. + """ if np.ma.is_masked(b_arr): b_arr = b_arr[~b_arr.mask] s = "" @@ -403,26 +455,34 @@ def byte_arr_to_string(self, b_arr: np.ma.masked_array) -> str: return s def metaStationName(self) -> str: - """Returns the metaStationName.""" + """Get the station name from metadata. + + Returns: + str: Station name, or None if dataset not loaded. + """ if self.nc is None: return None return self.byte_arr_to_string(self.nc.variables["metaStationName"][:]) def get_var(self, var_name: str): - """Checks if a variable exists then returns a pointer to it.""" + """Return the NetCDF variable object if it exists. + + Args: + var_name (str): Variable name. + + Returns: + netCDF4.Variable or None: Variable object or None. + """ if self.nc is None or var_name not in self.nc.variables: return None return self.nc.variables[var_name] def get_dataset_urls(self) -> dict: - """Returns a dict of two lists of urls (or paths) to all CDIP station datasets. + """Retrieve URLs or paths for all CDIP datasets for the station. - The top level keys are 'realtime' and 'historic'. The urls are retrieved by - either descending into the THREDDS catalog.xml or recursively walking through data_dir sub - directories. - - For applications that need to use the data from multiple deployment files for - a station, stndata:get_nc_files will load those files efficiently. + Returns: + dict: Dictionary with keys 'realtime' and 'archive', each containing + a list of dataset URLs or local paths. """ if self.data_dir is not None: result = {"realtime": [], "archive": []} @@ -478,43 +538,48 @@ def get_dataset_urls(self) -> dict: def set_dataset_info( self, stn: str, org: str, dataset_name: str, deployment: int = None ) -> None: - """Sets self.stn, org, filename, url and loads self.nc. The key to understanding all of - this is that we are ultimately setting _url_, which can be an actual path to the - nc file or a url to THREDDS DoDS service. - - PARAMETERS - ---------- - stn : str - Can be in 3char (e.g. 028) or 5char (e.g. 028p2) format for org=cdip - org: str - (Organization) Values are: cdip|ww3|external - dataset_name : str - Values: realtime|historic|archive|realtimexy|archivexy| - predeploy|moored|offsite|recovered - deployment : int [optional] - Supply this to access specific station deployment data. - Must be >= 1. - - Paths are: - /EXTERNAL/WW3/ [filename=__.nc][CDIP stn like 192w3] - /REALTIME/ [filename=_rt.nc] - /REALTIME/ [filename=_xy.nc] - /ARCHIVE// [filename=_.nc] - /PREDEPLOY// [filename=__rt.nc]** - /PREDEPLOY// [filename=__xy.nc]** - - **Active deployment directories are PREDEPLOY (p0), MOORED (p1), OFFSITE (p2) and RECOVERED (p3) - pX = p0|p1|p2|p3; deployment = dXX e.g. d01 - - Urls are: - http://thredds.cdip.ucsd/thredds/dodsC/// - [org1=external|cdip,org_dir=WW3|OWI etc] - http://thredds.cdip.ucsd/thredds/dodsC/// - - Note: - Since adding dataset_name, we no longer need the 5char stn id - for org=cdip datasets. The p_val will be 'p1' for every dataset except - active datasets in buoy states predeploy (p0), offsite (p2) and recovered (p3). + """ + Sets `self.stn`, `self.org`, `self.filename`, `self.url` and loads `self.nc`. + The key to understanding all of this is that we are ultimately setting `_url_`, + which can be an actual path to the nc file or a URL to a THREDDS DODS service. + + Args: + stn (str): Station identifier. Can be in 3-char (e.g., "028") or + 5-char (e.g., "028p2") format for org="cdip". + org (str): Organization. Values are "cdip", "ww3", or "external". + dataset_name (str): Dataset name. Values include: + "realtime", "historic", "archive", "realtimexy", "archivexy", + "predeploy", "moored", "offsite", "recovered". + deployment (int, optional): Supply this to access specific station deployment data. + Must be >= 1. + + Notes: + Paths: + - /EXTERNAL/WW3/ + [filename = __.nc] (CDIP station like 192w3) + - /REALTIME/ + [filename = _rt.nc] + - /REALTIME/ + [filename = _xy.nc] + - /ARCHIVE// + [filename = _.nc] + - /PREDEPLOY// + [filename = __rt.nc] + - /PREDEPLOY// + [filename = __xy.nc] + + Active deployment directories are PREDEPLOY (p0), MOORED (p1), OFFSITE (p2), and RECOVERED (p3). + Here, pX = p0|p1|p2|p3; deployment = dXX (e.g., d01). + + URLs: + - http://thredds.cdip.ucsd/thredds/dodsC/// + [org1 = external|cdip, org_dir = WW3|OWI etc] + - http://thredds.cdip.ucsd/thredds/dodsC/// + + Note: + Since adding `dataset_name`, we no longer need the 5-char station ID + for org="cdip" datasets. The `p_val` will be "p1" for every dataset except + active datasets in buoy states: predeploy (p0), offsite (p2), and recovered (p3). """ ext = ".nc" @@ -591,19 +656,17 @@ def set_dataset_info( self.org = org self.nc = self.get_nc() - class Latest(CDIPnc): - """Loads the latest_3day.nc and has methods for retrieving the data.""" + """Access the latest 3-day CDIP dataset with retrieval methods for all metadata and variables.""" # Do not apply the mask to get_request calls. apply_mask = False def __init__(self, data_dir: str = None): - """PARAMETERS - ---------- - data_dir : str [optional] - Either a full path to a directory containing a local directory hierarchy - of nc files. E.g. '/project/WNC' or a url to a THREDDS server. + """Initialize Latest instance and load latest_3day.nc. + + Args: + data_dir (str, optional): Local path or THREDDS URL. """ CDIPnc.__init__(self, data_dir) @@ -625,7 +688,11 @@ def __init__(self, data_dir: str = None): self.nc = self.get_nc(self.url) def metaStationNames(self) -> list: - """Get list of latest station names.""" + """Retrieve a list of station names from the latest dataset. + + Returns: + list: Station names. + """ if self.nc is None: return None names = [] @@ -634,7 +701,11 @@ def metaStationNames(self) -> list: return names def metaSiteLabels(self) -> list: - """Sets and returns self.labels, a list of station labels, e.g. ['100p1',...].""" + """Retrieve site labels for the stations (e.g., '100p1'). + + Returns: + list: Site labels. + """ if self.nc is None: return None for label_arr in self.nc.variables["metaSiteLabel"]: @@ -642,7 +713,11 @@ def metaSiteLabels(self) -> list: return self.labels def metaDeployLabels(self) -> list: - """Returns a list of metaDeployLabels.""" + """Retrieve deployment labels for the stations. + + Returns: + list: Deployment labels. + """ if self.nc is None: return None labels = [] @@ -651,7 +726,11 @@ def metaDeployLabels(self) -> list: return labels def metaDeployNumbers(self) -> list: - """Returns a list of metaDeployNumbers.""" + """Retrieve deployment numbers for the stations. + + Returns: + list: Deployment numbers. + """ if self.nc is None: return None numbers = [] @@ -660,7 +739,11 @@ def metaDeployNumbers(self) -> list: return numbers def metaWMOids(self) -> list: - """Returns a list of WMO ids, e.g. ['46225',...].""" + """Retrieve WMO IDs for the stations. + + Returns: + list: WMO IDs. + """ if self.nc is None: return None labels = [] @@ -669,7 +752,11 @@ def metaWMOids(self) -> list: return labels def metaLatitudes(self) -> list: - """Returns a list of station latitudes, e.g. [23.4,...].""" + """Retrieve latitudes for the stations. + + Returns: + list: Latitudes. + """ if self.nc is None: return None lats = [] @@ -678,7 +765,11 @@ def metaLatitudes(self) -> list: return lats def metaLongitudes(self) -> list: - """Returns a list of station longitudes, e.g. [23.4,...].""" + """Retrieve longitudes for the stations. + + Returns: + list: Longitudes. + """ if self.nc is None: return None lons = [] @@ -687,7 +778,11 @@ def metaLongitudes(self) -> list: return lons def metaWaterDepths(self) -> list: - """Returns a list of station water depths.""" + """Retrieve water depths for the stations. + + Returns: + list: Water depths. + """ if self.nc is None: return None depths = [] @@ -702,20 +797,24 @@ def get_latest( params: list = None, array_format=True, ) -> list: - """ - By default, array_format = True, it will return a dictionary of numpy masked - arrays of the latest requested parameters as well as metadata information. - - If array_format = False, it returns a list of dicts. Each dict will contain - latest station data and metadata. - - Parameter data values that are masked or non-existant are set to np.nan. - Time values (e.g. 'waveTime') for the wave data if masked or non-existant - are set to None. - - Both meta_vars and params if None (or not included in the argument list) will - return default sets of meta_vars and parameters. If meta_vars and params are set - just those will be returned. + """Retrieve the latest station data and metadata. + + Args: + pub_set (str, optional): Data quality filter. Options are 'public', 'nonpub', or 'all'. Defaults to 'public'. + meta_vars (list, optional): List of metadata variable names to include. If None, uses default metadata variables. + params (list, optional): List of parameter variable names to include. If None, uses default parameters. + array_format (bool, optional): If True, returns a dictionary of numpy masked arrays for each variable. + If False, returns a list of dictionaries, one per station. Defaults to True. + + Returns: + dict or list: If array_format is True, returns a dictionary where keys are variable names and values are numpy masked arrays. + If array_format is False, returns a list of dictionaries, each containing latest station data and metadata. + + Notes: + - Parameter data values that are masked or non-existent are set to np.nan. + - Time values (e.g., 'waveTime') for wave data that are masked or non-existent are set to None. + - If both meta_vars and params are None, default sets are used. + - If meta_vars and params are provided, only those variables are returned. """ # Use these if params (or meta_vars) is None @@ -901,18 +1000,14 @@ def __init__( org: str = None, ): """ - PARAMETERS - ---------- - stn : str - Can be in 2, 3 or 5 char format e.g. 28, 028, 028p2 - active_state_key : str - Values: predeploy|moored|offsite|recovered - deployment : int [optional] - Supply this to access specific station deployment data. - Must be >= 1. - data_dir : str [optional] - Either a full path to a directory containing a local directory hierarchy - of nc files. E.g. '/project/WNC' or a url to a THREDDS server. + Initialize Active dataset. + + Args: + stn (str): Station identifier (2, 3, or 5 char format, e.g., '28', '028', '028p2'). + deployment (int): Deployment number (>=1) to access specific station deployment data. + active_state_key (str): One of 'predeploy', 'moored', 'offsite', 'recovered'. + data_dir (str, optional): Full path to local directory hierarchy of nc files or THREDDS server URL. + org (str, optional): Organization identifier. """ CDIPnc.__init__(self, data_dir) self.set_dataset_info(stn, org, active_state_key, deployment) @@ -923,7 +1018,14 @@ class Realtime(CDIPnc): """Loads the realtime nc file for the given station.""" def __init__(self, stn: str, data_dir: str = None, org: str = None): - """For parameters: See CDIPnc.set_dataset_info.""" + """ + Initialize Realtime dataset. + + Args: + stn (str): Station identifier. + data_dir (str, optional): Directory containing NetCDF files or URL to THREDDS server. + org (str, optional): Organization identifier. + """ CDIPnc.__init__(self, data_dir) self.set_dataset_info(stn, org, "realtime") @@ -932,8 +1034,14 @@ class Historic(CDIPnc): """Loads the historic nc file for a given station.""" def __init__(self, stn, data_dir=None, org=None): - """For parameters see CDIPnc.set_dataset_info.""" + """ + Initialize Historic dataset. + Args: + stn (str): Station identifier. + data_dir (str, optional): Directory containing NetCDF files or URL to THREDDS server. + org (str, optional): Organization identifier. + """ CDIPnc.__init__(self, data_dir) self.set_dataset_info(stn, org, "historic") @@ -945,7 +1053,15 @@ class Archive(CDIPnc): """Loads an archive (deployment) file for a given station and deployment.""" def __init__(self, stn, deployment=None, data_dir=None, org=None): - """For parameters see CDIPnc.set_dataset_info.""" + """ + Initialize Archive dataset. + + Args: + stn (str): Station identifier. + deployment (int or str, optional): Deployment number or string identifier (e.g., 'd02'). Defaults to 1. + data_dir (str, optional): Directory containing NetCDF files or URL to THREDDS server. + org (str, optional): Organization identifier. + """ CDIPnc.__init__(self, data_dir) if not deployment: deployment = 1 @@ -970,7 +1086,15 @@ def __make_xyzTime(self, start_idx: int, end_idx: int) -> int: return t0 - d + i / r def get_xyz_timestamp(self, xyzIndex: int) -> int: - """Returns the timestamp corresponding to the given xyz array index.""" + """ + Get the timestamp corresponding to a given xyz array index. + + Args: + xyzIndex (int): Index in xyz array. + + Returns: + int or None: Timestamp for the xyz index, or None if unavailable. + """ t0 = self.get_var("xyzStartTime")[0] r = self.get_var("xyzSampleRate")[0] # Mark I will have filter delay set to fill value @@ -982,8 +1106,14 @@ def get_xyz_timestamp(self, xyzIndex: int) -> int: return None def get_request(self): - """Overrides the base class method to handle xyz data requests.""" + """ + Retrieve requested data from the dataset. + + Overrides base class method to handle xyz data requests. + Returns: + dict: Dictionary of requested data arrays. + """ # If not an xyz request, use base class version if self.get_var_prefix(self.vrs[0]) != "xyz": return super(Archive, self).get_request() @@ -1019,12 +1149,14 @@ class ActiveXY(Archive): def __init__(self, stn, deployment, dataset, data_dir=None, org=None): """ - PARAMETERS - ---------- - dataset : str - Active dataset name. - Values are: predeploy|moored|offsite|recovered. - For other parameters see CDIPnc.set_dataset_info. + Initialize ActiveXY dataset. + + Args: + stn (str): Station identifier. + deployment (int or str): Deployment number or string identifier. + dataset (str): Active dataset name ('predeploy', 'moored', 'offsite', 'recovered'). + data_dir (str, optional): Directory containing NetCDF files or URL to THREDDS server. + org (str, optional): Organization identifier. """ CDIPnc.__init__(self, data_dir) self.set_dataset_info(stn, org, dataset + "xy", deployment) @@ -1035,6 +1167,13 @@ class RealtimeXY(Archive): """Loads the realtime xy nc file for the given station.""" def __init__(self, stn, data_dir=None, org=None): - """For parameters see CDIPnc.set_dataset_info.""" + """ + Initialize RealtimeXY dataset. + + Args: + stn (str): Station identifier. + data_dir (str, optional): Directory containing NetCDF files or URL to THREDDS server. + org (str, optional): Organization identifier. + """ CDIPnc.__init__(self, data_dir) self.set_dataset_info(stn, org, "realtimexy") From 56c95c098a55a396fdfde03b527e59e4491470c6 Mon Sep 17 00:00:00 2001 From: Kishan Savant Date: Thu, 9 Oct 2025 23:44:51 +0530 Subject: [PATCH 3/4] Fixed trailing whitespaces and blank lines --- cdippy/cdipnc.py | 37 ++++++++++++++++--------------------- cdippy/stndata.py | 1 - 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/cdippy/cdipnc.py b/cdippy/cdipnc.py index 69cd0bf..de29226 100644 --- a/cdippy/cdipnc.py +++ b/cdippy/cdipnc.py @@ -110,7 +110,6 @@ def __init__(self, data_dir: str = None, deployment: int = None): or a URL to a THREDDS server. Examples: '/project/WNC' or THREDDS URL. deployment (int, optional): Station deployment number (>=1) to access specific data. """ - self.nc = None self.data_dir = data_dir self.deployment = deployment @@ -472,7 +471,7 @@ def get_var(self, var_name: str): Returns: netCDF4.Variable or None: Variable object or None. - """ + """ if self.nc is None or var_name not in self.nc.variables: return None return self.nc.variables[var_name] @@ -544,41 +543,38 @@ def set_dataset_info( which can be an actual path to the nc file or a URL to a THREDDS DODS service. Args: - stn (str): Station identifier. Can be in 3-char (e.g., "028") or - 5-char (e.g., "028p2") format for org="cdip". + stn (str): Station identifier. Can be in 3-char (e.g., "028") or 5-char (e.g., "028p2") format for org="cdip". org (str): Organization. Values are "cdip", "ww3", or "external". - dataset_name (str): Dataset name. Values include: - "realtime", "historic", "archive", "realtimexy", "archivexy", - "predeploy", "moored", "offsite", "recovered". - deployment (int, optional): Supply this to access specific station deployment data. - Must be >= 1. + dataset_name (str): Dataset name. Values include: "realtime", "historic", "archive", "realtimexy", "archivexy", + "predeploy", "moored", "offsite", "recovered". + deployment (int, optional): Supply this to access specific station deployment data. Must be >= 1. Notes: Paths: - - /EXTERNAL/WW3/ + - /EXTERNAL/WW3/ [filename = __.nc] (CDIP station like 192w3) - - /REALTIME/ + - /REALTIME/ [filename = _rt.nc] - - /REALTIME/ + - /REALTIME/ [filename = _xy.nc] - - /ARCHIVE// + - /ARCHIVE// [filename = _.nc] - - /PREDEPLOY// + - /PREDEPLOY// [filename = __rt.nc] - - /PREDEPLOY// + - /PREDEPLOY// [filename = __xy.nc] - Active deployment directories are PREDEPLOY (p0), MOORED (p1), OFFSITE (p2), and RECOVERED (p3). + Active deployment directories are PREDEPLOY (p0), MOORED (p1), OFFSITE (p2), and RECOVERED (p3). Here, pX = p0|p1|p2|p3; deployment = dXX (e.g., d01). URLs: - - http://thredds.cdip.ucsd/thredds/dodsC/// + - http://thredds.cdip.ucsd/thredds/dodsC/// [org1 = external|cdip, org_dir = WW3|OWI etc] - http://thredds.cdip.ucsd/thredds/dodsC/// Note: - Since adding `dataset_name`, we no longer need the 5-char station ID - for org="cdip" datasets. The `p_val` will be "p1" for every dataset except + Since adding `dataset_name`, we no longer need the 5-char station ID + for org="cdip" datasets. The `p_val` will be "p1" for every dataset except active datasets in buoy states: predeploy (p0), offsite (p2), and recovered (p3). """ ext = ".nc" @@ -668,7 +664,6 @@ def __init__(self, data_dir: str = None): Args: data_dir (str, optional): Local path or THREDDS URL. """ - CDIPnc.__init__(self, data_dir) self.labels = [] # - Holds stn labels, e.g. '100p1' for this instance # Set latest timespan (Latest_3day goes up to 30 minutes beyond now) @@ -705,7 +700,7 @@ def metaSiteLabels(self) -> list: Returns: list: Site labels. - """ + """ if self.nc is None: return None for label_arr in self.nc.variables["metaSiteLabel"]: diff --git a/cdippy/stndata.py b/cdippy/stndata.py index 0ceee3a..05c3d97 100644 --- a/cdippy/stndata.py +++ b/cdippy/stndata.py @@ -255,7 +255,6 @@ def get_series( Returns: dict: Dictionary of requested variable arrays. """ - if vrs is None: vrs = self.parameter_vars prefix = self.get_var_prefix(vrs[0]) From 690320ad41b70025f18232f6a785a4c9c22711b5 Mon Sep 17 00:00:00 2001 From: Kishan Savant Date: Thu, 9 Oct 2025 23:47:31 +0530 Subject: [PATCH 4/4] Fixed rem trailing whitespaces and blank lines --- cdippy/cdipnc.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cdippy/cdipnc.py b/cdippy/cdipnc.py index de29226..b642cc7 100644 --- a/cdippy/cdipnc.py +++ b/cdippy/cdipnc.py @@ -471,7 +471,7 @@ def get_var(self, var_name: str): Returns: netCDF4.Variable or None: Variable object or None. - """ + """ if self.nc is None or var_name not in self.nc.variables: return None return self.nc.variables[var_name] @@ -538,8 +538,8 @@ def set_dataset_info( self, stn: str, org: str, dataset_name: str, deployment: int = None ) -> None: """ - Sets `self.stn`, `self.org`, `self.filename`, `self.url` and loads `self.nc`. - The key to understanding all of this is that we are ultimately setting `_url_`, + Sets `self.stn`, `self.org`, `self.filename`, `self.url` and loads `self.nc`. + The key to understanding all of this is that we are ultimately setting `_url_`, which can be an actual path to the nc file or a URL to a THREDDS DODS service. Args: @@ -652,6 +652,7 @@ def set_dataset_info( self.org = org self.nc = self.get_nc() + class Latest(CDIPnc): """Access the latest 3-day CDIP dataset with retrieval methods for all metadata and variables."""