From 7f42c4282a0ccce896ce60f18edfa4c834cf5fcf Mon Sep 17 00:00:00 2001 From: Phil Owen <19691521+PhillipsOwen@users.noreply.github.com> Date: Fri, 22 Nov 2024 09:48:17 -0500 Subject: [PATCH] altering log statement --- src/common/geopoints_url.py | 22 +-- src/common/geopoints_urls_from_times.py | 190 +++--------------------- src/common/geopoints_utilities.py | 41 ++--- 3 files changed, 41 insertions(+), 212 deletions(-) diff --git a/src/common/geopoints_url.py b/src/common/geopoints_url.py index a05cd06..8c13a5f 100644 --- a/src/common/geopoints_url.py +++ b/src/common/geopoints_url.py @@ -43,10 +43,10 @@ def __init__(self, app_name='GeoPointsURL.TEST', _logger=None): self.logger = _logger else: # get the log level and directory from the environment. - log_level, log_path = LoggingUtil.prep_for_logging() + __log_level, __log_path = LoggingUtil.prep_for_logging() # create a logger - self.logger = LoggingUtil.init_logging(app_name, level=log_level, line_format='medium', log_file_path=log_path) + self.logger = LoggingUtil.init_logging(app_name, level=__log_level, line_format='medium', log_file_path=__log_path) # Define some basic mappings for URL to variables names. Can override using CI variables self.var_mapper = {'fort': 'zeta', 'swan': 'swan_HS'} @@ -182,10 +182,7 @@ def run(self, args): if n_days <= 0: self.logger.debug('Build list of URLs to fetch: n_days lookback is %s', n_days) - - rpl = GenerateURLsFromTimes(_logger=self.logger, url=url, time_in=None, time_out=None, n_days=n_days, grid_name=None, instance_name=None, - config_name=None) - + rpl = GenerateURLsFromTimes(_logger=self.logger, url=url, time_in=None, time_out=None, n_days=n_days, grid_name=None, instance_name=None) new_urls = rpl.build_url_list_from_template_url_and_offset(ensemble=ensemble) self.logger.info('New URL list %s', new_urls) @@ -250,22 +247,25 @@ def run(self, args): # init the return RET_VAL = 0 + # get the log level and directory from the environment. + log_level, log_path = LoggingUtil.prep_for_logging() + # setup a logger for testing - logger = LoggingUtil.init_logging("GeoPointsURL.test", level=10, line_format='medium', log_file_path='./geopoints_url-test.log') + logger = LoggingUtil.init_logging("GeoPointsURL.test", level=log_level, line_format='medium', log_file_path=log_path) try: from argparse import ArgumentParser parser = ArgumentParser() - parser.add_argument('--lon', action='store', dest='lon', default=None, type=float, help='lon: longitude value for time series extraction') - parser.add_argument('--lat', action='store', dest='lat', default=None, type=float, help='lat: latitude value for time series extraction') + parser.add_argument('--lon', action='store', dest='lon', required=True, type=float, help='lon: longitude value for time series extraction') + parser.add_argument('--lat', action='store', dest='lat', required=True, type=float, help='lat: latitude value for time series extraction') parser.add_argument('--variable_name', action='store', dest='variable_name', default=None, type=str, help='Optional variable name of interest from the supplied url') parser.add_argument('--kmax', action='store', dest='kmax', default=10, type=int, help='nearest_neighbors values when performing the Query') parser.add_argument('--alt_urlsource', action='store', dest='alt_urlsource', default=None, type=str, help='Alternative location for the ADCIRC data - NOTE specific formatting requirements exist') - parser.add_argument('--url', action='store', dest='url', default=None, type=str, help='Specify FQ URL') + parser.add_argument('--url', action='store', dest='url', required=True, type=str, help='Specify FQ URL') parser.add_argument('--keep_headers', action='store_true', default=True, help='Boolean: Indicates to add header names to output files') parser.add_argument('--ensemble', action='store', dest='ensemble', default=None, type=str, help='Choose overriding ensemble such as nowcast. Else internal code extracts from the URL') @@ -278,7 +278,7 @@ def run(self, args): logger.debug('Input args: %s', cli_args) # instantiate the geo-point URL class - gp_url = GeoPointsURL(logger) + gp_url = GeoPointsURL(_logger=logger) # Call the runner df_out = gp_url.run(cli_args) diff --git a/src/common/geopoints_urls_from_times.py b/src/common/geopoints_urls_from_times.py index 0d3688e..0d3598e 100644 --- a/src/common/geopoints_urls_from_times.py +++ b/src/common/geopoints_urls_from_times.py @@ -38,38 +38,20 @@ class GenerateURLsFromTimes: As an example, if grabbing a nowcast, the data series may begin 6 hours before the indicated url time. Pass in a URL and the instance, gridname, are scraped from it - If YAML you must specify these terms. - - Possible scenarios: (options such as instance/ensemble can be applied to all) - 1) Input timein/timeout and the config_name YML (nominal name is url_framework.yml). - This will generate a set of URLs between the two time ranges. - This can work for a Hurricane, BUT timein/timeout must be ADVISORY values - 2) Input timeout and offset and the config_name YML (nominal name is url_framework.yml). - This will generate a set of URLs between the two time ranges. - This can work for a Hurricane, BUT timeout must be ADVISORY values - 3) Input URL and offset only. - This will scrape the time/advisory from the URL and offset it in 6-hour steps and - generate a set of URLs between the two time/advisory ranges. - This can work for hurricanes - - starttime is the selected time to begin the building of the list (YYYY-mm-dd HH:MM:SS) - stoptime is the selected time to end the building of the list (YYYY-mm-dd HH:MM:SS) + Former YAML-based URL structural assignments has been deprecated. Refer original AST codes - Parameters: + starttime is the selected time to begin the building of the list (YYYY-mm-dd HH:MM:SS) + stoptime is the selected time to end the building of the list (YYYY-mm-dd HH:MM:SS) + + Parameters: url: (str) A single URL from which more URLs may be built ndays: (int) Number of look back/ahead days from the stoptime value - config_name: (str) path/filename to yaml file that contains the INSTANCE mappings - hurricane_yaml_source=None: (str) This is a special case. - If you want to build Hurricane URLs from a YAML, then you will need to specify the subdir name directly, e.g. 'al09'. - This will replace the default value of a name. - hurricane_yaml_year: (str) is part of the Hurricane special case. No way to dig out the year directory name without the user specifying it - only needed for YAML based hurricane construction. Eg .../thredds/dodsC/2021/al09/11/ec95d/... """ def __str__(self): return self.__class__.__name__ def __init__(self, _app_name='GenerateURLsFromTimes.TEST', _logger=None, url=None, time_in=None, time_out=None, n_days=None, grid_name=None, - instance_name=None, config_name=None, hurricane_yaml_year=None, hurricane_yaml_source=None): + instance_name=None): # get a handle to a logger self.logger = _logger @@ -80,7 +62,6 @@ def __init__(self, _app_name='GenerateURLsFromTimes.TEST', _logger=None, url=Non # The Hurricane special terms are only usedY if you are requesting to build from a YAML AND the caller wants Hurricane data # If a URL passed in, then gridname and instance can be gotten from it. # ensemble values are expected to be changed by the user - self.config_name = config_name if url is not None: words = url.split('/') self.ensemble = words[-2] @@ -101,9 +82,6 @@ def __init__(self, _app_name='GenerateURLsFromTimes.TEST', _logger=None, url=Non if self.grid_name is None: raise Exception('Must specify a grid_name if building URLs based on a YAML. None specified: Abort') - self.hurricane_yaml_source = hurricane_yaml_source - self.hurricane_yaml_year = hurricane_yaml_year # Cannot span multiple years using Hurricane-YAML construction - # timeout MUST be supplied somehow if time_out is None and stop_time is None: raise Exception('timeout is not set and no URL provided: Abort') @@ -150,6 +128,7 @@ def build_url_list_from_template_url_and_times(self, ensemble='nowcast') -> list urls = [] for time, instance in zip(list_of_times, list_of_instances): + self.logger.debug('time: %s, instance: %s', time, instance) words = url.split('/') words[-2] = ensemble words[-3] = self.instance_name @@ -192,6 +171,7 @@ def build_url_list_from_template_url_and_offset(self, ensemble='nowcast') -> lis urls = [] for time, instance in zip(list_of_times, list_of_instances): + self.logger.debug('time: %s, instance: %s', time, instance) words = url.split('/') words[-2] = ensemble words[-3] = self.instance_name @@ -202,117 +182,6 @@ def build_url_list_from_template_url_and_offset(self, ensemble='nowcast') -> lis self.logger.debug('Constructed %s urls of ensemble %s', urls, ensemble) return urls - @staticmethod - def load_config(config_name): - """ - placeholder method to load the config file - - :param config_name: - :return: - """ - return config_name - - # Approach Used by ADDA - def build_url_list_from_yaml_and_times(self, ensemble='nowcast') -> list: - """ - We seek to build a set of compatible URLs spanning the input time range based on the - structure of asgs urls in the config_name. The structure of the output URLs will be based on the - entries in the associated YAML file. Since, no url will be provided, we must ask the caller to provide - the gridname, ensemble, and instance. We expect the caller to provide a proper Instance value - for the new URLs. - We REQUIRE the grid name. Only change in the ensemble and times are expected - - This uses the following class variables: - time_range: (tuple) (datetime, datetime). Time range inclusive (could also be hurricane advisories) - instance: (str) if set the used for all urls. If not, attempt to find it in the YAML - gridname: (str) name for the grid - - Parameters: - ensemble: (str) ensemble name (dafaults to nowcast) - - Returns: - urls: list(str). List of valid URLs for processing - """ - config = None - - if self.config_name is None: - raise Exception('self.config_name is None. Cannot use the YAML generators: Abort') - - try: - config = self.load_config(self.config_name) - except FileNotFoundError as e: # OSError: - raise FileNotFoundError(f'No URL structural config yml file found: {self.config_name}: Abort') from e - - time_range = (self.start_time, self.stop_time) # Could also be a range of advisories - list_of_times = self.utils.generate_six_hour_time_steps_from_range(time_range) - list_of_instances = self.utils.generate_list_of_instances(list_of_times, self.grid_name, self.instance_name) - - urls = [] - - self.logger.debug('list_of_times: %s', list_of_times) - self.logger.debug('list_of_instances: %s', list_of_instances) - - for time, instance in zip(list_of_times, list_of_instances): - url = self.utils.construct_url_from_yaml(config, time, self.instance_name, ensemble, self.grid_name, - hurricane_yaml_year=self.hurricane_yaml_year, hurricane_yaml_source=self.hurricane_yaml_source) - if url not in urls: - urls.append(url) - - self.logger.debug('Constructed %s urls of ensemble %s based on the YML', urls, ensemble) - return urls - - # Approach Used by ADDA - def build_url_list_from_yaml_and_offset(self, ensemble='nowcast') -> list: - """ - We seek to build a set of compatible URLs spanning the input time range based on the - structure of asgs urls in the config_name. The structure of the output URLs will be based on the - entries in the associated YAML file. Since, no url will be provided, we must ask the caller to provide - the gridname, ensemble, and instance. We expect the caller to provide a proper Instance value - for the new URLs. - We REQUIRE the grid name. Only change in the ensemble and times are expected - - Uses the following class variables: - offset: (int). The offset in days - instance: (str) if set then used for all urls - gridname: (str) name for the grid - ensemble: (str) ensemble name (dafaults to nowcast) - - Parameters: - ensemble: (str) ensemble name (dafaults to nowcast) - - Returns: - urls: list(str). List of valid URLs for processing - """ - config = None - - if self.config_name is None: - raise Exception('self.config_name is None. Cannot use the YAML generators: Abort') - - try: - config = self.load_config(self.config_name) - except OSError as e: - raise OSError(f'No URL structural config yml file {self.config_name} found: Abort') from e - - time_value = self.stop_time # Could also be an advisory - offset = self.n_days - if offset > 0: - self.logger.warning('Offset >0 specified: Behavior is not tested') - - list_of_times = self.utils.generate_six_hour_time_steps_from_offset(time_value, offset) - list_of_instances = self.utils.generate_list_of_instances(list_of_times, self.grid_name, self.instance_name) - - urls = [] - - for time, instance in zip(list_of_times, list_of_instances): - url = self.utils.construct_url_from_yaml(config, time, self.instance_name, ensemble, self.grid_name, - hurricane_yaml_year=self.hurricane_yaml_year, hurricane_yaml_source=self.hurricane_yaml_source) - if url not in urls: - urls.append(url) - - self.logger.warning('Constructed %s urls of ensemble %s based on the YML and offset', urls, ensemble) - - return urls - class GenerateURLsEntry: """ @@ -334,17 +203,18 @@ def __init__(self, _app_name='GenerateURLsEntry.TEST', _logger=None): self.logger = _logger else: # get the log level and directory from the environment. - log_level, log_path = LoggingUtil.prep_for_logging() + __log_level, __log_path = LoggingUtil.prep_for_logging() # create a logger - self.logger = LoggingUtil.init_logging(_app_name, level=log_level, line_format='medium', log_file_path=log_path) + self.logger = LoggingUtil.init_logging(_app_name, level=__log_level, line_format='medium', log_file_path=__log_path) def run(self, args): """ A simple main method to demonstrate the use of this class """ - config_name = args.config_name if args.config_name is not None else os.path.join(os.path.dirname(__file__), '../config', 'url_framework.yml') + # init the return + new_urls: list = [] # Set up IO env self.logger.debug("Product Level Working in %s.", os.getcwd()) @@ -361,58 +231,38 @@ def run(self, args): if args.timein is not None: self.logger.debug('Selecting a specific time-range procedure') rpl = GenerateURLsFromTimes(_logger=self.logger, url=args.url, time_in=args.timein, time_out=args.timeout, n_days=None, - grid_name=None, instance_name=None, config_name=None) + grid_name=None, instance_name=None) new_urls = rpl.build_url_list_from_template_url_and_times(ensemble=args.ensemble) else: self.logger.debug('Selecting time+ndays procedure') rpl = GenerateURLsFromTimes(_logger=self.logger, url=args.url, time_in=None, time_out=args.timeout, n_days=args.ndays, grid_name=None, - instance_name=None, config_name=None) + instance_name=None) new_urls = rpl.build_url_list_from_template_url_and_offset(ensemble=args.ensemble) else: - self.logger.debug('Selecting a YAML generation method') - if args.grid_name is None or args.instance_name is None or config_name is None: - raise Exception('YAML-based procedures requires gridname, instance_name and config_name') - if args.hurricane_yaml_year is not None and args.hurricane_yaml_source is not None: - self.logger.debug('Detected values required for building YAML-based Hurricane urls') - if args.timein is not None: - self.logger.debug('Selecting a specific time-range procedure') - rpl = GenerateURLsFromTimes(_logger=self.logger, time_in=args.timein, time_out=args.timeout, n_days=None, grid_name=args.grid_name, - instance_name=args.instance_name, config_name=args.config_name, - hurricane_yaml_year=args.hurricane_yaml_year, hurricane_yaml_source=args.hurricane_yaml_source) - new_urls = rpl.build_url_list_from_yaml_and_times(ensemble=args.ensemble) - else: - self.logger.debug('Selecting time+ndays procedure') - rpl = GenerateURLsFromTimes(_logger=self.logger, time_in=None, time_out=args.timeout, n_days=args.ndays, grid_name=args.grid_name, - instance_name=args.instance_name, config_name=args.config_name, - hurricane_yaml_year=args.hurricane_yaml_year, hurricane_yaml_source=args.hurricane_yaml_source) - new_urls = rpl.build_url_list_from_yaml_and_times(ensemble=args.ensemble) + self.logger.debug('No URL was specified') self.logger.debug('New urls: %s', new_urls) if __name__ == '__main__': + # get the log level and directory from the environment. + log_level, log_path = LoggingUtil.prep_for_logging() + # setup a logger for testing - logger = LoggingUtil.init_logging("GenerateURLsFromTimes.test", level=10, line_format='medium', - log_file_path='./geopoints_url_from_times-test.log') + logger = LoggingUtil.init_logging("GenerateURLsFromTimes.test", level=log_level, line_format='medium', log_file_path=log_path) from argparse import ArgumentParser parser = ArgumentParser() - parser.add_argument('--url', default=None, action='store', dest='url', help='Input URL that may be used to build new output urls', type=str) + parser.add_argument('--url', required=True, action='store', dest='url', help='Input URL that may be used to build new output urls', type=str) parser.add_argument('--ndays', default=None, action='store', dest='ndays', help='Day lag (usually < 0)', type=int) parser.add_argument('--timeout', default=None, action='store', dest='timeout', help='YYYY-mm-dd HH:MM:SS. Latest day of analysis', type=str) parser.add_argument('--timein', default=None, action='store', dest='timein', help='YYYY-mm-dd HH:MM:SS .Start day of analysis. ', type=str) - parser.add_argument('--config_name', action='store', dest='config_name', default=None, - help='String: yml config which contains URL structural information') parser.add_argument('--instance_name', action='store', dest='instance_name', default=None, help='String: Choose instance name. Required if using a YAML-based URL construction') parser.add_argument('--grid_name', action='store', dest='grid_name', default=None, help='String: Choose grid_name. Required if using a YAML-based URL construction') parser.add_argument('--ensemble', action='store', dest='ensemble', default='nowcast', help='String: Specify ensemble name ') - parser.add_argument('--hurricane_yaml_year', action='store', dest='hurricane_yaml_year', default=None, - help='String: Needed only for Hurricane/YML procedures') - parser.add_argument('--hurricane_yaml_source', action='store', dest='hurricane_yaml_source', default=None, - help='String: Needed only for Hurricane/YML procedures') cli_args = parser.parse_args() diff --git a/src/common/geopoints_utilities.py b/src/common/geopoints_utilities.py index 1cdbbd9..efd7e1a 100644 --- a/src/common/geopoints_utilities.py +++ b/src/common/geopoints_utilities.py @@ -363,6 +363,8 @@ def water_level_selection(self, t, data_list, final_weights): # Index is a loop over multiple possible lon/lat pairs for index, data_series, weights in zip(range(0, len(data_list)), data_list, final_weights): + self.logger.debug('weights: %s', weights ) + df_single = pd.DataFrame(index=t) count = 0 @@ -490,7 +492,7 @@ def combined_pipeline(self, url, variable_name, lon, lat, nearest_neighbors=10): t0 = tm.time() df_product_data = ag_results['final_reduced_data'] - df_product_metadata = ag_results['final_meta_data'] + # df_product_metadata = ag_results['final_meta_data'] df_excluded_geopoints = pd.DataFrame(geopoints[ag_results['outside_elements']], index=ag_results['outside_elements'] + 1, columns=['lon', 'lat']) @@ -499,8 +501,7 @@ def combined_pipeline(self, url, variable_name, lon, lat, nearest_neighbors=10): return df_product_data, df_excluded_geopoints # , df_product_metadata - @staticmethod - def is_hurricane(test_val) -> bool: + def is_hurricane(self, test_val) -> bool: """ Determine of the input test val is a Date, an Int or something else Parameters: @@ -514,9 +515,12 @@ def is_hurricane(test_val) -> bool: except (ValueError, TypeError): try: test = dt.datetime.strptime(test_val, '%Y%m%d%H') + self.logger.debug('test: %s', test) except Exception: try: out_id = int(test_val) + self.logger.debug('out_id: %s', out_id) + is_hurricane = True except ValueError as e: raise ValueError(f'test indicates not a hurricane nor a casting. Perhaps a format issue?. Got {test_val}: Abort') from e @@ -714,8 +718,7 @@ def grab_years_from_time_list(list_of_times) -> list: return list_of_years - @staticmethod - def generate_list_of_instances(list_of_times, in_gridname, in_instance): + def generate_list_of_instances(self, list_of_times, in_gridname, in_instance): """ This function matches every entry in the list_of_times with an associated instance. The structure of this code is such that, in the future, we may have scenarios where @@ -732,6 +735,8 @@ def generate_list_of_instances(list_of_times, in_gridname, in_instance): Returns: instance_list: ordered list of instances to use for building a set of new urls. """ + self.logger.debug('list_of_times: %s, in_gridname: %s, in_instance: %s', list_of_times, in_gridname, in_instance) + num_entries = len(list_of_times) # gridname = in_gridname # Get default values @@ -741,32 +746,6 @@ def generate_list_of_instances(list_of_times, in_gridname, in_instance): return instance_list - - # Expect this to be part of a looped list of times from which appending will be applied - def construct_url_from_yaml(self, config, intime, instance, ensemble, gridname, hurricane_yaml_year=None, hurricane_yaml_source=None): - """ - Given a single time (%Y%m%d%H) or advisory, the gridname, instance, and ensemble values - use the entries in config to build a proper URL - If applying to Hurricanes, we need to also applyld_url_list_from_yaml_and_timest the values for hurricane_yaml_year, and - hurricane_yaml_source - """ - # hurricane_yaml_source is a special case scenario - if self.is_hurricane(intime): - self.logger.debug('Request for YAML build of Hurricane URL. subdir is %s', hurricane_yaml_source) - intime = str(intime) - subdir = hurricane_yaml_year # This is certainly NOT generalized - source = hurricane_yaml_source - else: - subdir = dt.datetime.strptime(intime, '%Y%m%d%H').year - source = 'nam' - - cfg = config['ADCIRC'] - url = cfg["baseurl"] + cfg["dodsCpart"] % ( - subdir, source, intime, cfg["AdcircGrid"] % (gridname), cfg["Machine"], cfg["Instance"] % (instance), cfg["Ensemble"] % (ensemble), - cfg["fortNumber"]) - - return url - def construct_start_time_from_offset(self, stop_time, n_days): """ Construct an appropriate start_time given the stop_time and offset.