From 863b756b149d1a62fb9339d46bbb3b92a73060d6 Mon Sep 17 00:00:00 2001 From: Oisin Date: Wed, 16 Oct 2024 10:38:29 +0100 Subject: [PATCH 1/7] Ignoring cleaned_data directory --- .dockerignore | 1 + .gitignore | 1 + 2 files changed, 2 insertions(+) diff --git a/.dockerignore b/.dockerignore index 5bf512a..4591f18 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,5 +1,6 @@ data/Met_Eireann/arch data/Met_Eireann/scraped_data +data/Met_Eireann/cleaned_data *__pycache__ *.ipynb_checkpoints *.xlsx diff --git a/.gitignore b/.gitignore index e78ee81..57a5a11 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # ignore sub repos data/Met_Eireann/arch data/Met_Eireann/scraped_data +data/Met_Eireann/cleaned_data *__pycache__ *.ipynb_checkpoints *.xlsx From 3ec524c3381477d2f3ae9a1f11c56928a6a26503 Mon Sep 17 00:00:00 2001 From: Oisin Date: Wed, 16 Oct 2024 10:39:20 +0100 Subject: [PATCH 2/7] Addded s3 constants. Credentials, cleaned and scraped data directories. --- webscraper/cons.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/webscraper/cons.py b/webscraper/cons.py index 546bdfb..dc5e3e9 100644 --- a/webscraper/cons.py +++ b/webscraper/cons.py @@ -10,6 +10,7 @@ sys.path.append(root_dir) # set directories data_dir = os.path.join(root_dir, 'data') +creds_data = os.path.join(root_dir, '.creds') gis_dir = os.path.join(data_dir, "gis") met_eireann_dir = os.path.join(data_dir, 'Met_Eireann') bokeh_ref_data_dir = os.path.join(data_dir, "bokeh", "ref") @@ -23,11 +24,13 @@ map_data_fpath = os.path.join(gis_dir, "map_data.pickle") points_data_fpath = os.path.join(gis_dir, "points_data.pickle") scraped_data_dir = os.path.join(met_eireann_dir, 'scraped_data') +cleaned_data_dir = os.path.join(met_eireann_dir, 'cleaned_data') stations_fpath = os.path.join(met_eireann_dir, 'ref', 'StationDetails.csv') unittest_normal_dists_fpath = os.path.join(bokeh_ref_data_dir, "unittest_normal_dists.json") col_options_fpath = os.path.join(bokeh_ref_data_dir, "col_options.json") stat_options_fpath = os.path.join(bokeh_ref_data_dir, "stat_options.json") agg_level_strftime_fpath = os.path.join(bokeh_ref_data_dir, "agg_level_strftime.json") +session_token_fpath = os.path.join(creds_data, "sessionToken.json") # load bokeh reference data with open(col_options_fpath) as json_file: @@ -35,4 +38,10 @@ with open(stat_options_fpath) as json_file: stat_options = json.load(json_file) with open(agg_level_strftime_fpath) as json_file: - date_strftime_dict = json.load(json_file) \ No newline at end of file + date_strftime_dict = json.load(json_file) + +# aws s3 constants +s3_bucket = "irishclimatedashboard" +s3_scraped_directory = "data/Met_Eireann/scraped_data" +s3_clean_directory = "data/Met_Eireann/cleaned_data" +s3_fname = "dly{station_id}.csv" \ No newline at end of file From 073f5aa35c2e3ed0ba23a43b4eb70e8f89875d2b Mon Sep 17 00:00:00 2001 From: Oisin Date: Wed, 16 Oct 2024 10:40:15 +0100 Subject: [PATCH 3/7] Converted / created a Boto3 s3 client object class --- webscraper/arch/gen_boto3_excel.py | 55 ------------------- webscraper/utilities/S3Client.py | 84 ++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 55 deletions(-) delete mode 100644 webscraper/arch/gen_boto3_excel.py create mode 100644 webscraper/utilities/S3Client.py diff --git a/webscraper/arch/gen_boto3_excel.py b/webscraper/arch/gen_boto3_excel.py deleted file mode 100644 index 3c5b74d..0000000 --- a/webscraper/arch/gen_boto3_excel.py +++ /dev/null @@ -1,55 +0,0 @@ -import io -import boto3 -import json -from beartype import beartype - -@beartype -def gen_boto3_excel( - sessionToken:str, - bucket:str="irishclimateapp", - prefix:str="data/Met_Eireann" - ) -> list: - """Retrieves the raw Met Eireann data from AWS s3 - - Parameters - ---------- - sessionToken : str - The file path to an active aws session token - bucket : str - The s3 bucket containing the Met Eireann data files - prefix : str - The s3 directory containing the Met Eireann data files - - Returns - ------- - list - The raw Met Eireann data - """ - # load aws config - with open(sessionToken, "r") as j: - aws_config = json.loads(j.read()) - # connect to aws boto3 - session = boto3.Session( - aws_access_key_id=aws_config['Credentials']["AccessKeyId"], - aws_secret_access_key=aws_config['Credentials']["SecretAccessKey"], - aws_session_token=aws_config['Credentials']["SessionToken"], - region_name="eu-west-1" - ) - # generate boto3 s3 connection - client = session.client("s3") - # create a paginator to list all objects - paginator = client.get_paginator("list_objects_v2") - # apply the paginator to list all files in the irishclimateapp bucket with key data/Met_Eireann - operation_parameters = {"Bucket": bucket, "Prefix": prefix} - page_iterator = paginator.paginate(**operation_parameters) - # filter down contents keys with .xlsx - filtered_iterator = page_iterator.search("Contents[?contains(Key,'.xlsx')].Key") - # extract out the file keys - file_keys = [content_key for content_key in filtered_iterator] - # load s3 objects into list - objs_list = [ - client.get_object(Bucket=bucket, Key=file_key) for file_key in file_keys - ] - # decode xlsx files in body - data_list = [io.BytesIO(obj["Body"].read()) for obj in objs_list] - return data_list diff --git a/webscraper/utilities/S3Client.py b/webscraper/utilities/S3Client.py new file mode 100644 index 0000000..5202865 --- /dev/null +++ b/webscraper/utilities/S3Client.py @@ -0,0 +1,84 @@ +import io +import boto3 +import json +import logging +import pandas as pd +from typing import Union +from beartype import beartype + +class S3Client(): + + @beartype + def __init__(self, sessionToken:str): + # load aws config + with open(sessionToken, "r") as j: + aws_config = json.loads(j.read()) + # connect to aws boto3 + self.session = boto3.Session( + aws_access_key_id=aws_config['Credentials']["AccessKeyId"], + aws_secret_access_key=aws_config['Credentials']["SecretAccessKey"], + aws_session_token=aws_config['Credentials']["SessionToken"], + region_name="eu-west-1" + ) + # generate boto3 s3 connection + self.client = self.session.client("s3") + + @beartype + def store( + self, + data:pd.DataFrame, + key:str, + bucket:str="irishclimateapp" + ): + """Stores a raw Met Eireann data file on s3. + + Parameters + ---------- + directory : str + The s3 key to store the Met Eireann data files + bucket : str + The s3 bucket storing the Met Eireann data files + + Returns + ------- + """ + try: + logging.info(f"Storing data to S3://{bucket}/{key}") + csv_buf = io.StringIO() + data.to_csv(csv_buf, header=True, index=False) + csv_buf.seek(0) + self.client.put_object(Bucket=bucket, Body=csv_buf.getvalue(), Key=key) + except Exception as e: + logging.info(str(e)) + + @beartype + def retrieve( + self, + key:str, + bucket:str="irishclimateapp" + ): + + """Retrieves a raw Met Eireann data from AWS s3. + + Parameters + ---------- + key : str + The s3 key containing the Met Eireann data file + bucket : str + The s3 bucket containing the Met Eireann data file + + Returns + ------- + + The raw Met Eireann data + """ + data = None + try: + logging.info(f"Retrieving data from S3://{bucket}/{key}") + # load s3 objects into list + obj = self.client.get_object(Bucket=bucket, Key=key) + # decode xlsx files in body + data = pd.read_csv(obj["Body"]) + except Exception as e: + logging.info(str(e)) + return data From 0e6e6486730e0cdf9e0f17e26d9e765c813a6eb8 Mon Sep 17 00:00:00 2001 From: Oisin Date: Wed, 16 Oct 2024 10:40:54 +0100 Subject: [PATCH 4/7] Extracted out cleaning logic to sperated script. Added additional writing of clean filees to disk and s3 --- webscraper/utilities/clean_data.py | 41 +++++++++++++++++++++++++ webscraper/utilities/gen_master_data.py | 3 +- 2 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 webscraper/utilities/clean_data.py diff --git a/webscraper/utilities/clean_data.py b/webscraper/utilities/clean_data.py new file mode 100644 index 0000000..e805660 --- /dev/null +++ b/webscraper/utilities/clean_data.py @@ -0,0 +1,41 @@ +import logging +import os +from beartype import beartype +from typing import Union +import cons +from webscraper.utilities.load_data import load_data +from utilities.S3Client import S3Client + +@beartype +def clean_data( + scraped_data_dir:str=cons.scraped_data_dir, + cleaned_data_dir:str=cons.cleaned_data_dir, + store_on_s3:bool=False + ): + """Generates the master data from the individual raw Met Eireann .xlsx files + + Parameters + ---------- + scraped_data_dir : str + The local directory to load the raw Met Eireann .csv files from + cleaned_data_dir : str + The local directory to write the cleaned Met Eireann .csv files to + + Returns + ------- + """ + # load data files from file directory + scraped_data_fpaths = [os.path.join(scraped_data_dir, fname) for fname in os.listdir(scraped_data_dir)] + logging.info("Reading, cleaning and storing files ...") + s3client = S3Client(sessionToken=cons.session_token_fpath) + for fpath in scraped_data_fpaths: + # extract basename + fname = os.path.basename(fpath) + # load data + clean_data = load_data(fpath) + # write data to clean data directory + cleaned_data_fpath = os.path.join(cleaned_data_dir, fname) + clean_data.to_csv(cleaned_data_fpath, header=True, index=False) + if store_on_s3: + # store data on s3 back up repository + s3client.store(data=clean_data, bucket=cons.s3_bucket, key=f"{cons.s3_clean_directory}/{fname}") \ No newline at end of file diff --git a/webscraper/utilities/gen_master_data.py b/webscraper/utilities/gen_master_data.py index 02081a9..5fe25ab 100644 --- a/webscraper/utilities/gen_master_data.py +++ b/webscraper/utilities/gen_master_data.py @@ -4,7 +4,6 @@ import cons from beartype import beartype from typing import Union -from webscraper.utilities.load_data import load_data @beartype def gen_master_data( @@ -30,7 +29,7 @@ def gen_master_data( met_eireann_fpaths = [os.path.join(cons.scraped_data_dir, fname) for fname in os.listdir(cons.scraped_data_dir)] logging.info("Reading, concatenating and cleaning .xlsx files ...") # load and concatenate data files together - data_list = [load_data(fpath) for fpath in met_eireann_fpaths] + data_list = [pd.read_csv(fpath) for fpath in met_eireann_fpaths] data = pd.concat(objs=data_list, ignore_index=True, axis=0) # order results by county, id and date alphabetically data = data.sort_values(by=["county", "id", "date"]).reset_index(drop=True) From 352cc25db3b58d8cfe08f88e8c24db212c6f03df Mon Sep 17 00:00:00 2001 From: Oisin Date: Wed, 16 Oct 2024 10:41:47 +0100 Subject: [PATCH 5/7] Calling cleaning function --- webscraper/prg_webscrape_data.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/webscraper/prg_webscrape_data.py b/webscraper/prg_webscrape_data.py index a5c9848..1f0b317 100644 --- a/webscraper/prg_webscrape_data.py +++ b/webscraper/prg_webscrape_data.py @@ -9,6 +9,7 @@ from utilities.gen_preaggregate_data import gen_preaggregate_data from utilities.gen_counties_data import gen_counties_data from utilities.gen_stations_data import gen_stations_data +from utilities.clean_data import clean_data @beartype def webscrape_data( @@ -44,6 +45,8 @@ def webscrape_data( stations = load_stations_data(stations_fpath=cons.stations_fpath, filter_open=True) # run webscraper resp_log = retrieve_station_data(stations=stations, scraped_data_dir=cons.scraped_data_dir, data_level="dly") + # run data cleaning + clean_data(scraped_data_dir=cons.scraped_data_dir, cleaned_data_dir=cons.cleaned_data_dir) if generate_master_data: logging.info('~~~~~ Generating master data file ...') # generate master data file From 49702370ec2076323fd2c124f9386526cd281072 Mon Sep 17 00:00:00 2001 From: Oisin Date: Thu, 17 Oct 2024 08:48:46 +0100 Subject: [PATCH 6/7] #27 Revised webscraping pipeline. Added missing doc strings and updated beartyping. Combined scripts within common subprocesses. Updated programme parameters in .bat and .py files. Added missing default cons values. --- exeWebscrapeData.bat | 2 +- webscraper/prg_webscrape_data.py | 110 ++++++++++++------ webscraper/utilities/clean_data.py | 41 ------- webscraper/utilities/commandline_interface.py | 28 ++--- .../{load_data.py => gen_clean_data.py} | 48 +++++++- .../{gen_counties_data.py => gen_map_data.py} | 50 ++++---- webscraper/utilities/gen_master_data.py | 37 +++--- webscraper/utilities/gen_met_data.py | 82 +++++++++++++ ...en_stations_data.py => gen_points_data.py} | 36 +++--- ...reaggregate_data.py => gen_preagg_data.py} | 35 +++--- webscraper/utilities/load_stations_data.py | 35 ------ webscraper/utilities/retrieve_station_data.py | 36 ------ webscraper/utilities/url_retrieve.py | 34 ------ 13 files changed, 290 insertions(+), 284 deletions(-) delete mode 100644 webscraper/utilities/clean_data.py rename webscraper/utilities/{load_data.py => gen_clean_data.py} (56%) rename webscraper/utilities/{gen_counties_data.py => gen_map_data.py} (60%) create mode 100644 webscraper/utilities/gen_met_data.py rename webscraper/utilities/{gen_stations_data.py => gen_points_data.py} (54%) rename webscraper/utilities/{gen_preaggregate_data.py => gen_preagg_data.py} (54%) delete mode 100644 webscraper/utilities/load_stations_data.py delete mode 100644 webscraper/utilities/retrieve_station_data.py delete mode 100644 webscraper/utilities/url_retrieve.py diff --git a/exeWebscrapeData.bat b/exeWebscrapeData.bat index 4e1eb1e..d51547f 100644 --- a/exeWebscrapeData.bat +++ b/exeWebscrapeData.bat @@ -1 +1 @@ -call python webscraper\prg_webscrape_data.py --retrieve_data --generate_master_data --generate_preaggregated_data --generate_counties_data --generate_stations_data \ No newline at end of file +call python webscraper\prg_webscrape_data.py --run_met_data --run_clean_data --run_master_data --run_preagg_data --run_map_data --run_points_data \ No newline at end of file diff --git a/webscraper/prg_webscrape_data.py b/webscraper/prg_webscrape_data.py index 1f0b317..35703e2 100644 --- a/webscraper/prg_webscrape_data.py +++ b/webscraper/prg_webscrape_data.py @@ -3,35 +3,37 @@ import time from beartype import beartype from utilities.commandline_interface import commandline_interface -from utilities.load_stations_data import load_stations_data -from utilities.retrieve_station_data import retrieve_station_data +from utilities.gen_met_data import gen_met_data +from utilities.gen_clean_data import gen_clean_data from utilities.gen_master_data import gen_master_data -from utilities.gen_preaggregate_data import gen_preaggregate_data -from utilities.gen_counties_data import gen_counties_data -from utilities.gen_stations_data import gen_stations_data -from utilities.clean_data import clean_data +from utilities.gen_preagg_data import gen_preagg_data +from utilities.gen_map_data import gen_map_data +from utilities.gen_points_data import gen_points_data @beartype def webscrape_data( - retrieve_data:bool, - generate_master_data:bool, - generate_preaggregated_data:bool, - generate_counties_data:bool, - generate_stations_data:bool + run_met_data:bool, + run_clean_data:bool, + run_master_data:bool, + run_preagg_data:bool, + run_map_data:bool, + run_points_data:bool ): """Webscrape and process met data into dashboard files Parameters ---------- - retrieve_data : bool + run_met_data : bool Retrieves / web scrapes the historical met data - generate_master_data : bool + run_clean_data : bool + Cleans and processes the scraped met data + run_master_data : bool Generates the master data file from the retrieved / web scraped met data files - generate_preaggregated_data : bool + run_preagg_data : bool Preaggreates the master data file into various date levels for the bokeh dashboard app - generate_counties_data : bool - Generates the counties gis file for the bokeh dashboard app - generate_stations_data : bool + run_map_data : bool + Generates the map gis file for the bokeh dashboard app + run_points_data : bool Generates the stations gis file for the bokeh dashboard app Returns @@ -39,30 +41,61 @@ def webscrape_data( """ # start timer t0 = time.time() - if retrieve_data: + + if run_met_data: logging.info('~~~~~ Retrieving data for met stations ...') - # load stations data - stations = load_stations_data(stations_fpath=cons.stations_fpath, filter_open=True) # run webscraper - resp_log = retrieve_station_data(stations=stations, scraped_data_dir=cons.scraped_data_dir, data_level="dly") + gen_met_data( + stations_fpath=cons.stations_fpath, + filter_open=True, + topn_stations=5, + scraped_data_dir=cons.scraped_data_dir, data_level="dly" + ) + + if run_clean_data: + logging.info('~~~~~ Cleaning met stations data ...') # run data cleaning - clean_data(scraped_data_dir=cons.scraped_data_dir, cleaned_data_dir=cons.cleaned_data_dir) - if generate_master_data: + gen_clean_data( + scraped_data_dir=cons.scraped_data_dir, + cleaned_data_dir=cons.cleaned_data_dir, + store_on_s3=False + ) + + if run_master_data: logging.info('~~~~~ Generating master data file ...') # generate master data file - gen_master_data(master_data_fpath = cons.master_data_fpath) - if generate_preaggregated_data: + gen_master_data( + cleaned_data_dir=cons.cleaned_data_dir, + master_data_fpath=cons.master_data_fpath + ) + + if run_preagg_data: logging.info('~~~~~ Generating preaggregated data file ...') # generate the preaggregate data - gen_preaggregate_data(preaggregate_data_fpath = cons.preaggregate_data_fpath) - if generate_counties_data: - logging.info('~~~~~ Generating geospatial counties data file ...') + gen_preagg_data( + master_data_fpath=cons.master_data_fpath, + preaggregate_data_fpath=cons.preaggregate_data_fpath + ) + + if run_map_data: + logging.info('~~~~~ Generating geospatial map data file ...') # generate counties data - gen_counties_data(map_data_fpath = cons.map_data_fpath) - if generate_stations_data: - logging.info('~~~~~ Generating geospatial stations data file ...') + gen_map_data( + rep_counties_fpath=cons.rep_counties_fpath, + ni_counties_fpath=cons.ni_counties_fpath, + preaggregate_data_fpath=cons.preaggregate_data_fpath, + map_data_fpath=cons.map_data_fpath + ) + + if run_points_data: + logging.info('~~~~~ Generating geospatial points data file ...') # generate wheather station points data - gen_stations_data(points_data_fpath = cons.points_data_fpath) + gen_points_data( + master_data_fpath=cons.master_data_fpath, + stations_fpath=cons.stations_fpath, + points_data_fpath=cons.points_data_fpath + ) + # end timer and log result t1 = time.time() tres = t1 - t0 @@ -74,13 +107,16 @@ def webscrape_data( # set up logging lgr = logging.getLogger() lgr.setLevel(logging.INFO) + # handle input parameters input_params_dict = commandline_interface() + # call webscrape data webscrape_data( - retrieve_data=input_params_dict['retrieve_data'], - generate_master_data=input_params_dict['generate_master_data'], - generate_preaggregated_data=input_params_dict['generate_preaggregated_data'], - generate_counties_data=input_params_dict['generate_counties_data'], - generate_stations_data=input_params_dict['generate_stations_data'] + run_met_data=input_params_dict['run_met_data'], + run_clean_data=input_params_dict['run_clean_data'], + run_master_data=input_params_dict['run_master_data'], + run_preagg_data=input_params_dict['run_preagg_data'], + run_map_data=input_params_dict['run_map_data'], + run_points_data=input_params_dict['run_points_data'] ) \ No newline at end of file diff --git a/webscraper/utilities/clean_data.py b/webscraper/utilities/clean_data.py deleted file mode 100644 index e805660..0000000 --- a/webscraper/utilities/clean_data.py +++ /dev/null @@ -1,41 +0,0 @@ -import logging -import os -from beartype import beartype -from typing import Union -import cons -from webscraper.utilities.load_data import load_data -from utilities.S3Client import S3Client - -@beartype -def clean_data( - scraped_data_dir:str=cons.scraped_data_dir, - cleaned_data_dir:str=cons.cleaned_data_dir, - store_on_s3:bool=False - ): - """Generates the master data from the individual raw Met Eireann .xlsx files - - Parameters - ---------- - scraped_data_dir : str - The local directory to load the raw Met Eireann .csv files from - cleaned_data_dir : str - The local directory to write the cleaned Met Eireann .csv files to - - Returns - ------- - """ - # load data files from file directory - scraped_data_fpaths = [os.path.join(scraped_data_dir, fname) for fname in os.listdir(scraped_data_dir)] - logging.info("Reading, cleaning and storing files ...") - s3client = S3Client(sessionToken=cons.session_token_fpath) - for fpath in scraped_data_fpaths: - # extract basename - fname = os.path.basename(fpath) - # load data - clean_data = load_data(fpath) - # write data to clean data directory - cleaned_data_fpath = os.path.join(cleaned_data_dir, fname) - clean_data.to_csv(cleaned_data_fpath, header=True, index=False) - if store_on_s3: - # store data on s3 back up repository - s3client.store(data=clean_data, bucket=cons.s3_bucket, key=f"{cons.s3_clean_directory}/{fname}") \ No newline at end of file diff --git a/webscraper/utilities/commandline_interface.py b/webscraper/utilities/commandline_interface.py index b9efe59..f1cc605 100644 --- a/webscraper/utilities/commandline_interface.py +++ b/webscraper/utilities/commandline_interface.py @@ -4,12 +4,6 @@ def commandline_interface(): """A commandline interface for parsing input parameters with - Windows - python IrishClimateDashboard\\webscraper\\prg_webscraper_data.py --retrieve_data --generate_master_data --generate_preaggregated_data --generate_counties_data --generate_stations_data - - Linux - python3 IrishClimateDashboard/webscraper/prg_webscraper_data.py --retrieve_data --generate_master_data --generate_preaggregated_data --generate_counties_data --generate_stations_data - Parameters ---------- @@ -21,19 +15,21 @@ def commandline_interface(): # define argument parser object parser = argparse.ArgumentParser(description="Execute Random TeleCom Data Programme.") # add input arguments - parser.add_argument("--retrieve_data", action=argparse.BooleanOptionalAction, dest="retrieve_data", type=bool, default=False, help="Boolean, retrieves / web scrapes the historical met data",) - parser.add_argument("--generate_master_data", action=argparse.BooleanOptionalAction, dest="generate_master_data", type=bool, default=False, help="Boolean, generates the master data file from the retrieved / web scraped met data files",) - parser.add_argument("--generate_preaggregated_data", action=argparse.BooleanOptionalAction, dest="generate_preaggregated_data", type=bool, default=False, help="Boolean, preaggreates the master data file into various date levels for the bokeh dashboard app",) - parser.add_argument("--generate_counties_data", action=argparse.BooleanOptionalAction, dest="generate_counties_data", type=bool, default=False, help="Boolean, generates the counties gis file for the bokeh dashboard app",) - parser.add_argument("--generate_stations_data", action=argparse.BooleanOptionalAction, dest="generate_stations_data", type=bool, default=False, help="Boolean, generates the stations gis file for the bokeh dashboard app",) + parser.add_argument("--run_met_data", action=argparse.BooleanOptionalAction, dest="run_met_data", type=bool, default=False, help="Boolean, retrieves / web scrapes the historical met data",) + parser.add_argument("--run_clean_data", action=argparse.BooleanOptionalAction, dest="run_clean_data", type=bool, default=False, help="Boolean, cleans and processes the scraped met data",) + parser.add_argument("--run_master_data", action=argparse.BooleanOptionalAction, dest="run_master_data", type=bool, default=False, help="Boolean, generates the master data file from the retrieved / web scraped met data files",) + parser.add_argument("--run_preagg_data", action=argparse.BooleanOptionalAction, dest="run_preagg_data", type=bool, default=False, help="Boolean, preaggreates the master data file into various date levels for the bokeh dashboard app",) + parser.add_argument("--run_map_data", action=argparse.BooleanOptionalAction, dest="run_map_data", type=bool, default=False, help="Boolean, generates the map gis file for the bokeh dashboard app",) + parser.add_argument("--run_points_data", action=argparse.BooleanOptionalAction, dest="run_points_data", type=bool, default=False, help="Boolean, generates the stations gis file for the bokeh dashboard app",) # create an output dictionary to hold the results input_params_dict = {} # extract input arguments args = parser.parse_args() # map input arguments into output dictionary - input_params_dict["retrieve_data"] = args.retrieve_data - input_params_dict["generate_master_data"] = args.generate_master_data - input_params_dict["generate_preaggregated_data"] = args.generate_preaggregated_data - input_params_dict["generate_counties_data"] = args.generate_counties_data - input_params_dict["generate_stations_data"] = args.generate_stations_data + input_params_dict["run_met_data"] = args.run_met_data + input_params_dict["run_clean_data"] = args.run_clean_data + input_params_dict["run_master_data"] = args.run_master_data + input_params_dict["run_preagg_data"] = args.run_preagg_data + input_params_dict["run_map_data"] = args.run_map_data + input_params_dict["run_points_data"] = args.run_points_data return input_params_dict diff --git a/webscraper/utilities/load_data.py b/webscraper/utilities/gen_clean_data.py similarity index 56% rename from webscraper/utilities/load_data.py rename to webscraper/utilities/gen_clean_data.py index 0b84b68..0a9296a 100644 --- a/webscraper/utilities/load_data.py +++ b/webscraper/utilities/gen_clean_data.py @@ -1,9 +1,10 @@ - -import re +import logging import os +import re import pandas as pd -import cons from beartype import beartype +import cons +from utilities.S3Client import S3Client @beartype def load_data( @@ -17,8 +18,7 @@ def load_data( fpath : str The file path to load the webscraped met data from disk stations_fpath : str - The file path to load the reference station data from disk - + The file path to load the reference station data from disk, default is cons.stations_fpath Returns ------- @@ -56,3 +56,41 @@ def load_data( dataframe["county"] = dataframe["county"].str.title() dataframe["date"] = pd.to_datetime(dataframe["date"], format='%d-%b-%Y') return dataframe + + +@beartype +def gen_clean_data( + scraped_data_dir:str=cons.scraped_data_dir, + cleaned_data_dir:str=cons.cleaned_data_dir, + store_on_s3:bool=False + ): + """Generates the master data from the individual raw Met Eireann .xlsx files + + Parameters + ---------- + scraped_data_dir : str + The local directory to load the raw Met Eireann .csv files from, default is cons.scraped_data_dir + cleaned_data_dir : str + The local directory to write the cleaned Met Eireann .csv files to, default is cons.cleaned_data_dir + store_on_s3 : bool + Whether to back up the clean data files on s3, default is False + + Returns + ------- + """ + # load data files from file directory + scraped_data_fpaths = [os.path.join(scraped_data_dir, fname) for fname in os.listdir(scraped_data_dir)] + logging.info("Reading, cleaning and storing files ...") + s3client = S3Client(sessionToken=cons.session_token_fpath) + for fpath in scraped_data_fpaths: + # extract basename + fname = os.path.basename(fpath) + # load data + clean_data = load_data(fpath) + # write data to clean data directory + cleaned_data_fpath = os.path.join(cleaned_data_dir, fname) + logging.info(f"Writing cleaned data file {cleaned_data_fpath} to disk") + clean_data.to_csv(cleaned_data_fpath, header=True, index=False) + if store_on_s3: + # store data on s3 back up repository + s3client.store(data=clean_data, bucket=cons.s3_bucket, key=f"{cons.s3_clean_directory}/{fname}") \ No newline at end of file diff --git a/webscraper/utilities/gen_counties_data.py b/webscraper/utilities/gen_map_data.py similarity index 60% rename from webscraper/utilities/gen_counties_data.py rename to webscraper/utilities/gen_map_data.py index 631acc4..e7bc019 100644 --- a/webscraper/utilities/gen_counties_data.py +++ b/webscraper/utilities/gen_map_data.py @@ -8,31 +8,36 @@ from typing import Union @beartype -def gen_counties_data( - pre_agg_data_dict:Union[dict,None]=None, - map_data_fpath:Union[str,None]=None, +def gen_map_data( + rep_counties_fpath:str=cons.rep_counties_fpath, + ni_counties_fpath:str=cons.ni_counties_fpath, + preaggregate_data_fpath:str=cons.preaggregate_data_fpath, + map_data_fpath:str=cons.map_data_fpath ): """Generates counties map data for the bokeh map dashboard Parameters ---------- - pre_agg_data_dict : None or dict - Either the preaggregated data dictionary or loads the preaggregated data dictionary from disk when None, default is None - map_data_fpath : None or str - The file location to write the map data to disk, default is None + rep_counties_fpath : str + The file path to the republic of ireland counties .shp file on disk, default is cons.rep_counties_fpath, + ni_counties_fpath : str + The file path to northern irleand counties .shp file on disk, default is cons.ni_counties_fpath + pre_agg_data_dict : str + The file path to the preaggregated data on disk, default is cons.preaggregate_data_fpath + map_data_fpath : str + The file location to write the map data to disk, default is map_data_fpath Returns ------- """ logging.info("Loading rep / ni counties shape files ...") # load in county shape files - rep_counties = (gpd.read_file(cons.rep_counties_fpath)[["ENGLISH", "geometry"]].rename(columns={"ENGLISH": "county"}).to_crs(epsg=2157)) - ni_counties = gpd.read_file(cons.ni_counties_fpath)[["county", "geometry"]].to_crs(epsg=2157) - if type(pre_agg_data_dict) == type(None): - logging.info("Loading preaggregated data dictionary ...") - # load preaggregated data - with open(cons.preaggregate_data_fpath, "rb") as f: - pre_agg_data_dict = pickle.load(f) + rep_counties = (gpd.read_file(rep_counties_fpath)[["ENGLISH", "geometry"]].rename(columns={"ENGLISH": "county"}).to_crs(epsg=2157)) + ni_counties = gpd.read_file(ni_counties_fpath)[["county", "geometry"]].to_crs(epsg=2157) + logging.info("Loading preaggregated data dictionary ...") + # load preaggregated data + with open(preaggregate_data_fpath, "rb") as f: + pre_agg_data_dict = pickle.load(f) logging.info("Concatenating counties geopandas dataframes ...") # concatenate county shape files counties = gpd.GeoDataFrame(pd.concat([rep_counties, ni_counties], ignore_index=True), crs="EPSG:2157") @@ -61,19 +66,16 @@ def gen_counties_data( county_data = pre_agg_data.groupby(group_cols, as_index=False).agg(agg_dict) county_data['stat'] = stat map_data_list.append(county_data) - # map_data = pd.concat(objs=map_data_list,axis=0,ignore_index=True) # join county level data to map data map_geodata = gpd.GeoDataFrame( data=pd.merge(left=counties, right=map_data, on="county", how="left"), crs="EPSG:2157", ) - # if the output - if map_data_fpath != None: - if os.path.exists(map_data_fpath): - logging.info("Writing counties data to disk as pickle file ...") - # pickle the preaggregated data dictionary to disk - with open(map_data_fpath, "wb") as f: - pickle.dump(map_geodata, f, protocol=pickle.HIGHEST_PROTOCOL) - else: - raise ValueError(f"{map_data_fpath} does not exist") + if os.path.exists(map_data_fpath): + logging.info("Writing counties data to disk as pickle file ...") + # pickle the preaggregated data dictionary to disk + with open(map_data_fpath, "wb") as f: + pickle.dump(map_geodata, f, protocol=pickle.HIGHEST_PROTOCOL) + else: + raise ValueError(f"{map_data_fpath} does not exist") diff --git a/webscraper/utilities/gen_master_data.py b/webscraper/utilities/gen_master_data.py index 5fe25ab..6caef31 100644 --- a/webscraper/utilities/gen_master_data.py +++ b/webscraper/utilities/gen_master_data.py @@ -7,37 +7,36 @@ @beartype def gen_master_data( - met_eireann_fpaths:Union[list,None]=None, - master_data_fpath:Union[str,None]=None, + cleaned_data_dir:str=cons.cleaned_data_dir, + master_data_fpath:str=cons.master_data_fpath, ): """Generates the master data from the individual raw Met Eireann .xlsx files Parameters ---------- - met_eireann_fpaths : None or list - The raw Met Eireann .xlsx file paths, default is None - master_data_fpath : None or str - The file location to write the master data to disk, default is None + cleaned_data_dir : str + The raw Met Eireann .xlsx file paths, default is cons.cleaned_data_dir + master_data_fpath : str + The file location to write the master data to disk, default is cons.master_data_fpath Returns ------- """ - # if load data locally - if met_eireann_fpaths == None: - logging.info("Retrieving raw met eireann .xlsx file paths from disk ...") - # load data files from file directory - met_eireann_fpaths = [os.path.join(cons.scraped_data_dir, fname) for fname in os.listdir(cons.scraped_data_dir)] - logging.info("Reading, concatenating and cleaning .xlsx files ...") + logging.info("Retrieving cleaned file paths from disk ...") + # load data files from file directory + met_eireann_fpaths = [os.path.join(cleaned_data_dir, fname) for fname in os.listdir(cleaned_data_dir)] + logging.info("Reading and concatenating files ...") # load and concatenate data files together data_list = [pd.read_csv(fpath) for fpath in met_eireann_fpaths] data = pd.concat(objs=data_list, ignore_index=True, axis=0) + # convert date to datetime + data["date"] = pd.to_datetime(data["date"], format="%Y-%m-%d") # order results by county, id and date alphabetically data = data.sort_values(by=["county", "id", "date"]).reset_index(drop=True) # if the output - if master_data_fpath != None: - if os.path.exists(master_data_fpath): - logging.info("Writing master file to disk as .feather file ...") - # save concatenated data to disk - data.to_feather(master_data_fpath) - else: - raise ValueError(f"{master_data_fpath} does not exist") \ No newline at end of file + if os.path.exists(master_data_fpath): + logging.info("Writing master file to disk as .feather file ...") + # save concatenated data to disk + data.to_feather(master_data_fpath) + else: + raise ValueError(f"{master_data_fpath} does not exist") \ No newline at end of file diff --git a/webscraper/utilities/gen_met_data.py b/webscraper/utilities/gen_met_data.py new file mode 100644 index 0000000..a0c8fa0 --- /dev/null +++ b/webscraper/utilities/gen_met_data.py @@ -0,0 +1,82 @@ +import logging +import os +import pandas as pd +import urllib.request +from beartype import beartype +from typing import Union +import cons + +@beartype +def url_retrieve( + stationid:int, + scraped_data_dir:str=cons.scraped_data_dir, + data_level:str="dly" + ): + """Retrieves met data for a given station id + + Parameters + ---------- + stationid : int + The station id to retrieve data for + scraped_data_dir : str + The file directory to write the scraped met data to, default is cons.scraped_data_dir + data_level : str + The time level of the met data to scrape, default is "dly" + + Returns + ------- + urllib.request.urlretrieve, Exception + A retrieval response + """ + data_fname = f"{data_level}{stationid}.csv" + data_url = f"http://cli.fusio.net/cli/climate_data/webdata/{data_fname}" + download_data_fpath = os.path.join(scraped_data_dir, data_fname) + try: + resp = urllib.request.urlretrieve(data_url, download_data_fpath) + except Exception as e: + resp = e + return resp + +@beartype +def gen_met_data( + stations_fpath:str=cons.stations_fpath, + filter_open:bool=True, + topn_stations:Union[int, None]=None, + scraped_data_dir:str=cons.scraped_data_dir, + data_level:str="dly" + ): + """Webscrapes the met data for all station ids in a given stations dataframe + + Parameters + ---------- + stations_fpath : pd.DataFrame + The file path to the met eireann stations reference data, default is cons.stations_fpath + filter_open : bool + Whether to only filter for only open weather stations in the met eireann stations reference data, default is True + topn_stations : int + The number of stations to sample from the head of the met eireann stations reference data, default is None + scraped_data_dir : str + The file directory to write the scraped met data to, default is cons.scraped_data_dir + data_level : str + The time level of the met data to scrape, default is "dly" + + + Returns + ------- + """ + # load stations data + stations = pd.read_csv(stations_fpath) + if filter_open: + # only consider open stations for now + open_stations_filter = stations['close_year'].isnull() + stations = stations.loc[open_stations_filter, :].reset_index(drop=True) + if topn_stations != None: + stations = stations.head(topn_stations) + # iterate over each station and pull daily level data using using stationid + resp_log =[] + for idx, row in stations.iterrows(): + logging.info(f"{idx} {row['county']} {row['station_id']} {row['name']}") + resp = url_retrieve(stationid=row['station_id'], scraped_data_dir=scraped_data_dir, data_level=data_level) + logging.info(resp) + resp_log.append(resp) + \ No newline at end of file diff --git a/webscraper/utilities/gen_stations_data.py b/webscraper/utilities/gen_points_data.py similarity index 54% rename from webscraper/utilities/gen_stations_data.py rename to webscraper/utilities/gen_points_data.py index 8f1cf5b..dc4b211 100644 --- a/webscraper/utilities/gen_stations_data.py +++ b/webscraper/utilities/gen_points_data.py @@ -8,31 +8,35 @@ from typing import Union @beartype -def gen_stations_data( - points_data_fpath:Union[str,None]=None +def gen_points_data( + master_data_fpath:str=cons.master_data_fpath, + stations_fpath:str=cons.stations_fpath, + points_data_fpath:str=cons.points_data_fpath ): """Generates gis points data for Met Eireann stations Parameters ---------- + master_data_fpath : str + The file path to the master data on disk, default is cons.master_data_fpath + station_fpath : str + The file path to the stations reference data on disk, default is cons.stations_fpath points_data_fpath : str - The file location to write the gis points data to disk, default is None + The file location to write the gis points data to disk, default is cons.points_data_fpath Returns ------- """ logging.info("Loading master and stations data from disk ...") # load master and station data - master_data = pd.read_feather(cons.master_data_fpath) - stations_data = pd.read_csv(cons.stations_fpath) + master_data = pd.read_feather(master_data_fpath) + stations_data = pd.read_csv(stations_fpath) logging.info("Identifying master station ids ...") # extract out station ids from mater file master_station_ids = master_data["id"].unique() logging.info("Filtering corresponding station data ...") # filter master data with station ids - master_stations = stations_data.loc[ - stations_data["station_id"].isin(master_station_ids), : - ].copy() + master_stations = stations_data.loc[stations_data["station_id"].isin(master_station_ids), :].copy() master_stations["county"] = master_stations["county"].str.title() master_stations["name"] = master_stations["name"].str.title() logging.info("Creating geopandas DataFrame of station data ...") @@ -42,12 +46,10 @@ def gen_stations_data( geometry=gpd.points_from_xy(master_stations.longitude, master_stations.latitude), crs="EPSG:4326", ).to_crs(epsg=2157) - # if the output - if points_data_fpath != None: - if os.path.exists(points_data_fpath): - logging.info("Writing gis stations data to disk as .pickle file ...") - # pickle the gis stations data - with open(points_data_fpath, "wb") as f: - pickle.dump(geo_master_stations, f, protocol=pickle.HIGHEST_PROTOCOL) - else: - raise ValueError(f"{points_data_fpath} does not exist") + if os.path.exists(points_data_fpath): + logging.info("Writing gis stations data to disk as .pickle file ...") + # pickle the gis stations data + with open(points_data_fpath, "wb") as f: + pickle.dump(geo_master_stations, f, protocol=pickle.HIGHEST_PROTOCOL) + else: + raise ValueError(f"{points_data_fpath} does not exist") diff --git a/webscraper/utilities/gen_preaggregate_data.py b/webscraper/utilities/gen_preagg_data.py similarity index 54% rename from webscraper/utilities/gen_preaggregate_data.py rename to webscraper/utilities/gen_preagg_data.py index ef04595..5088b23 100644 --- a/webscraper/utilities/gen_preaggregate_data.py +++ b/webscraper/utilities/gen_preagg_data.py @@ -7,26 +7,25 @@ from typing import Union @beartype -def gen_preaggregate_data( - master_data:Union[pd.DataFrame,None]=None, - preaggregate_data_fpath:Union[str,None]=None +def gen_preagg_data( + master_data_fpath:str=cons.master_data_fpath, + preaggregate_data_fpath:str=cons.preaggregate_data_fpath ): """Generates preaggregate data for bokeh dashboard app Parameters ---------- - master_data : None or pd.DataFrame - Either the master data as a pandas.DataFrame or loads the master data from disk when None, default is None + master_data_fpath : None or pd.DataFrame + The file location to write the master data to disk, default is cons.master_data_fpath preaggregate_data_fpath : str - The file location to write the preaggregated data to disk, default is None + The file location to write the preaggregated data to disk, default is cons.preaggregate_data_fpath Returns ------- """ - if type(master_data) == type(None): - logging.info("Loading master data from disk ...") - # load master data - master_data = pd.read_feather(cons.master_data_fpath) + logging.info("Loading master data from disk ...") + # load master data + master_data = pd.read_feather(master_data_fpath) logging.info("Performing initial data aggregation to year-month level ...") # preaggregate the data to year-month level for each available stat pre_agg_data_dict = {} @@ -41,12 +40,10 @@ def gen_preaggregate_data( agg_dict = {col: stat for col in cons.col_options} tmp_agg_data = agg_data.groupby(group_cols, as_index=False).agg(agg_dict) pre_agg_data_dict[stat] = tmp_agg_data - # if the output - if preaggregate_data_fpath != None: - if os.path.exists(preaggregate_data_fpath): - logging.info("Writing preaggregated data to disk as .pickle file ...") - # pickle the preaggregated data dictionary to disk - with open(cons.preaggregate_data_fpath, "wb") as f: - pickle.dump(pre_agg_data_dict, f, protocol=pickle.HIGHEST_PROTOCOL) - else: - raise ValueError(f"{preaggregate_data_fpath} does not exist") + if os.path.exists(preaggregate_data_fpath): + logging.info("Writing preaggregated data to disk as .pickle file ...") + # pickle the preaggregated data dictionary to disk + with open(cons.preaggregate_data_fpath, "wb") as f: + pickle.dump(pre_agg_data_dict, f, protocol=pickle.HIGHEST_PROTOCOL) + else: + raise ValueError(f"{preaggregate_data_fpath} does not exist") diff --git a/webscraper/utilities/load_stations_data.py b/webscraper/utilities/load_stations_data.py deleted file mode 100644 index 0bf0d82..0000000 --- a/webscraper/utilities/load_stations_data.py +++ /dev/null @@ -1,35 +0,0 @@ -import pandas as pd -from beartype import beartype - -@beartype -def load_stations_data( - stations_fpath:str, - filter_open:bool=True, - topn:int=None - ) -> pd.DataFrame: - """Loads the station reference data file - - Parameters - ---------- - stations_fpath : str - The file path to load the reference station data from disk - filter_open : bool - Whether to only consider open stations and not closed stations - topn : int - The number of rows to filter from the head of the loaded stations data - - - Returns - ------- - pd.DataFrame - The loaded stations reference data - """ - # load stations data - stations = pd.read_csv(stations_fpath) - if filter_open: - # only consider open stations for now - open_stations_filter = stations['close_year'].isnull() - stations = stations.loc[open_stations_filter, :].reset_index(drop=True) - if topn != None: - stations = stations.head(topn) - return stations \ No newline at end of file diff --git a/webscraper/utilities/retrieve_station_data.py b/webscraper/utilities/retrieve_station_data.py deleted file mode 100644 index 87d2c0c..0000000 --- a/webscraper/utilities/retrieve_station_data.py +++ /dev/null @@ -1,36 +0,0 @@ -import logging -import pandas as pd -from utilities.url_retrieve import url_retrieve -from beartype import beartype - -@beartype -def retrieve_station_data( - stations:pd.DataFrame, - scraped_data_dir:str, - data_level:str="dly" - ) -> list: - """Webscrapes the met data for all station ids in a given stations dataframe - - Parameters - ---------- - stations : pd.DataFrame - The loaded reference stations data - scraped_data_dir : str - The file directory to write the scraped met data to - data_level : str - The time level of the met data to scrape, default is "dly" - - - Returns - ------- - list - A log of the webscrape responses - """ - # iterate over each station and pull daily level data using using stationid - resp_log =[] - for idx, row in stations.iterrows(): - logging.info(f"{idx} {row['county']} {row['station_id']} {row['name']}") - resp = url_retrieve(stationid=row['station_id'], scraped_data_dir=scraped_data_dir, data_level=data_level) - logging.info(resp) - resp_log.append(resp) - return resp_log \ No newline at end of file diff --git a/webscraper/utilities/url_retrieve.py b/webscraper/utilities/url_retrieve.py deleted file mode 100644 index b1de553..0000000 --- a/webscraper/utilities/url_retrieve.py +++ /dev/null @@ -1,34 +0,0 @@ -import os -import urllib.request -from beartype import beartype - -@beartype -def url_retrieve( - stationid:int, - scraped_data_dir:str, - data_level:str="dly" - ): - """Retrieves met data for a given station id - - Parameters - ---------- - stationid : int - The station id to retrieve data for - scraped_data_dir : str - The file directory to write the scraped met data to - data_level : str - The time level of the met data to scrape, default is "dly" - - Returns - ------- - urllib.request.urlretrieve, Exception - A retrieval response - """ - data_fname = f"{data_level}{stationid}.csv" - data_url = f"http://cli.fusio.net/cli/climate_data/webdata/{data_fname}" - download_data_fpath = os.path.join(scraped_data_dir, data_fname) - try: - resp = urllib.request.urlretrieve(data_url, download_data_fpath) - except Exception as e: - resp = e - return resp \ No newline at end of file From 7b753a61bb3dabda241ee9dcdc951b701d0686e9 Mon Sep 17 00:00:00 2001 From: Oisin Date: Thu, 17 Oct 2024 08:49:09 +0100 Subject: [PATCH 7/7] Recast date to datetime --- data/master.feather | Bin 33814562 -> 33737714 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/data/master.feather b/data/master.feather index 8a15ed4583c62a59e608df77c35d1989068d7435..0c403c95a75b6e5536273848c48b26c465a77311 100644 GIT binary patch delta 5346 zcmd_ui(6Cm1IO`w&qPraH}is;f`U4_7^M|u7DWF(W)25=CR!mbjA=?xR z^R`J@rU+A`L#7T(6Vu9WhDEtFb!Hfv73GCKpEKs;d7j_-3w}J$>ka2S#`k>BIUF`h z>YauQ#RUe}eIj*vQrDI-t#rC(I-Sn2HQN+c7$Z6q+VsZlWnl>-wBdHw>!sn>wj$5# zr4gYmUmmvBh_Ff1thvt@Mv6XscA>9jMGk&sbFgMl&&i&p)3wLfd-XcqA{Ya4a8be- z6hRL3hs#2zI|LrEK!3Q5=T<`zWI+OOm)E#wT+~a^hWyKojXIsA+pByQ!tKFuOf$#T z#>VZpM40u$RNN-p7%L?XE!-5@MQkh{nQe^5;EZfzr(#>SvDvn^7U^Y&(e5w`kU$U3 zAPkyA3kZjn&l};h3nTi&op2Z24T*3M+zSKXK1hOra6cr&129Nl7%_P5VmW%%;C(4r zc->EFaZ-J2t(mia9?W&07z{&TD5StJNQDPsI6MR+U?e>JPT<6-3!$Yc6I5xQPs_11 zi&W?^#@G$V)8G+EhtV(w9)-tXESTYO$bcu{Nf;*^j*s{63C+d}R_~SDXsyS(B@^pu zD9?l}$c70p5prM>JOxjK1t!B3m?}4vKhsdDn#NkMJL$XNX9-qIto0cj4k#mQ;X?Lv#M{fdx(H&YN6d_a?sr%ivXT zf(w?z3Mhmkcnw~MVps`p$ll~tp6gc}tGBjjuXX+o4_J*Qk8WCW_yYUzfvaH+tc7*p z1`n)<5_l8df(`ICyaOBIUDyPhp%k{r!v~giSSV*6D63e~bW4!@V>Ir4vMa3|r#NbK zIe1|!Y=iCa9#p{lAVVd506XAA*a^E}H~8eJ(N(6$t~=$i`s9h4S60=wDomSNQ3ZS8 zBlsBh!an!}_CqxsfKTBd9FkKjK6B)#rb!If{IyJr{T$QO^!gmWfWz=5d<9>_H}EYS zf$yLOj>7lwgIv?A*48pK)wsmkIFX`t7;6%1aYD^aCv>CEo<5)!euSUk82I65I1VS^ z7dQ#0;4}o_4Ezdb;Ws&bKwX7!XWB|cC%Jn{UEgarI*aClt6!;Wx|XfZI=j_*4t|Gv zI1d-#54Z?_!e8(=G{8UbuWWT*@=p#eTDcFI+i3pk9F{KQ_?p8m!xgv+jRoc~;RHrv z^h`5Gd7wGJYQcmvEtyurXAX-Hp2*P5Jls;Q*2KQK-6BMAq0X3?2!XU_+Axt!TgJc` znRd(#OnasS(~;@KL@}L(FD9mosGi-l3+}pgT|}G|uelEvcbCvU7)rVbr=g@Pb0gD@ z>CQwmF-#ApClkxWF};}HOdrN1$_*v);^xp%P>%oM{aUBn=QBsbE?qu!~zNn*ji>L#tsRNTVSMS}N$f*;{5(RP(b1yT1xsOR= z1~T_E$;<=HAZ9Q#gc-`D2w!5^Fp=I>j;{K}0By_%TpoX{gx zAP+LbnTMDW%t+>8W)zdgJi??iqnRSjNmeE_@SuWQb)`e7_fEh$};c$z69e z1N&w+XziQj`wZ?|=#@@dFX!>v41qksJjsk>#xt2r7L(0PU?ws-%p~S1=4r;lOcuW5 zwNu2T-tJ>(vPDnd$T@UF;AKQ%giEX zF|&lRGY-sua;fl?gsz!o%kyzwDmBkb_=qF`I?Yl~F3HV?zfecpGzR-a~5arJ=pC<(CSlEq@DB#*{N& zW-GIe+0MMjR50%|GE>QXAj)m|JH(36kx=en$$45Y1a|lSkXMAm^r1j@GP{`FjE|{e z_AnnYA2WNIeat7!e&KVNs)fU*nzSKV^Ez~GtLCgx71g-)RUBYGWezfjn9rEcnJ<{b z%$Lkp%-75}%(u)D+&e426Ul=@^DA!_W@$~otET4K0oT{ypy`h?-!nfjwakyqPs}mK z&-~0BXHGD`FejN)!e`T;7XIAO9w>+1RH}K>{R>X>2v`=K7D#|O!~DveWqxDom~+hU zOg(d+xxoCvTogXbqCbUoi)!j*chebncN_4RP`%yunK6R7{Yifbp+1f%03^JI;9%N@3->(H60CqsvbyoyLX(?OWprZ U{>PW5Z?zH^=>zHMqsHF;AJqy5HUIzs delta 6184 zcmds*i(eFF7suxr5V~mgo28o{b>O>f-!C8l(ad5&<>v~{RANw7(f(g6cGbOARWX50aPAv4RjBLym1Ii@MwqdiFmPB$GzmPb?CfVQ9=xCgWc9Y9AA4myE*L1)kf zbd`>#bnBA3<4R#-ko5C`ZpV^5Rq`{q+p6UjAx5B3h6`=w(Y-m^E|?biM}Yf4chCbw zf}WrkxF1A;Xb=Np!2=);^ag!^4m=3@N{jsamDX|vDwDV_>b%c{7ppHIbdmWOc_ zi(jQ|KjEoZ?kbb$?g#pVhrj?Z5X6Ir!65JmcoYlrB zgEzqnU;?=yPclcFo6K@|TAL>uS*WyrpZWU%=+Uf8X7s3PlC_u2=#&L+B^ht0RJg%j(42#O?Q{YKke_3Ghg)U;FUmVDPHpMo{! zD3gtM0<`BT+fRE67HBqIJiiHS25*BcU@Nc!8`uW6gLlBY;63m@*a3>c2Vf`I1xlog z=Xc+;Ou4;?P4e()iX zfCC%=2f-om5jYG!mdy3e0nhn%oAt-`*I1-DL$%ox5-X5S!eqYLb0?cNB~^eU;3)V6 zd#nhyjFF}v6q}gBgF^uQ%+7`s41lEB={11 z1**Z<;2ZEQI0e1~r@{B22AlzBrI507tAe;3wm8m3vaxE{qvL^dKJ8N^DA&c?KKdYK z+u&)R;vm!PL+8K`;79Ni_!+psFW@}*75oN%2Y-MIpcecIE`m$a>_eCLGtYgub{p#; z4LN+d_nl|MEY)(=`qj}4oz9m_$WUfzV?dqB7*G$c05@m=SHWN4Z*UF#1FnNca0A?w zi~+Y?8C=Qa8QRcZ_5QCm-a^+HTJ!h6t+gp;&Mj~oG=cws=9MvXm<5nTk{`*R)PfX1 zYDo$t1uXtr)xP@Ve@=*v+lkbj$5p+YI0Gx!YR3$Xg;}!7$rG-Q#iBO zn>vy1C3PlsA$28nBSnzzBXuYBAVrdTl6sNuXJz)LC>HCRn{xkESF1IYQx_eL+}czh z=R2xnrNtZ@&7c@kEa?GK9H}>{4@pOQkkpsdkJO*^5NQB&nqvpDUR8IeR6A3(T)Jxm zDW%$}e7SVfEavM2aoFqeq=!j^NRN;nB@HGGAw5P)APprABMm2wASEKN>m%8a>Arbc zPfkq2Wt7^eJZGHAsY%RdrZ8r-k>>>OOc6C(*`DB;DRO69wx=aAD4Fy)X%y)R(vzgo zq%owiBt7XV(m2x7q-RLalAa@tCmEP?d)o7CRi-?FIQyJe(Dj=;RttV|D6_SdW(`etw z9Q-m{e(c7oJa(PMnVYC?w(k5osJ44Sing~!m$ivOn@Mkzwve`xtRx$08)-Y~9n!m` z_ek%Pc94omACPu3r!H$3tL(v5QJK0os!op!*6gAx*5vckW5gQyfxCL$F1C7I3D^xv zK^d@vJ)j)y1^d8$@F9?ZgO#p3z}9kkD66zwwN;ukr`JKMRzmi_mTB3Rvrz}pyitcp zACV4|J|;Oy6{I7iqohwrpOTJ|K10t(9cLSIe6vw{zVUI@o_Bw6oU&<5;Cnt{sAW{i zaRybAsz{%cz95|-og{rp`ifLd`kM3&>08n%(s#@`s^l~qGmOhi{`tB@HLZ?UPSeG5 z%;D1#Jc|HZ!)a!*HGEI1A)O(eC7mPvK>CsN6X|D?i}VZWJn2``Z_H_H_?@}ta=FPX zrl|dDo%HeaKd59;C43_pODriFe=z6*sh0F7=_2V8=`yK~R8P7>a+4ZJS4n>{XG+H3 z?6uu@XQbcZ=_zZM4%Zm>63b~ogqq5#G}RE#gDb~sDG$HKpnpi$NsXi%q?@E$q}!w> z(tk+JY-M@4h!0G3;)A%sX4SR4#loC@nr|`|XiYvSit*WcdI^Vr@ne+xJ}^!B>y77@ zDg|=2jgY_RjpHSgyqz1^pNj{cQZBa3HJpn(ZYZx$YJbxE#s@dFmy*@B?amvK8>6O;-hlvi-^ zgc-`f)!o13;`lKoZyVx&;NrHi%5jq4s^j90S<3hI;%#|+;Z8~0E15QS4xtyOcjC6C zO1yS!CoW#oSBYy*M{{wkF69AryW|(IcaDQ4o*Yl_ntbQ`IDfp0m6VlI?U|H2J1Ab9 zraY@$kG;&ru_~b5g6sSuE{>