diff --git a/.github/workflows/tests_and_checks.yml b/.github/workflows/tests_and_checks.yml index db9ad27..00efc27 100644 --- a/.github/workflows/tests_and_checks.yml +++ b/.github/workflows/tests_and_checks.yml @@ -21,7 +21,7 @@ jobs: if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Test with pytest run: | - coverage run -m pytest --datetime-increments-limit=150 -m "not completion" + coverage run -m pytest --datetime-increments-limit=150 --number-of-dfs-to-stop-at=15 -m "not completion" - name: Generate coverage report run: | coverage report -m diff --git a/MISOReports/MISOReports.py b/MISOReports/MISOReports.py index 2ea2a52..c36e6f5 100644 --- a/MISOReports/MISOReports.py +++ b/MISOReports/MISOReports.py @@ -20,8 +20,10 @@ def __init__( ): """Constructor for Data class. - :param pd.DataFrame df: The tabular data from the report. - :param requests.Response response: The response from the download. + :param pd.DataFrame df: The tabular data from + the report. + :param requests.Response response: The response + from the download. """ self.df = df self.response = response @@ -41,9 +43,12 @@ def __init__( ): """Constructor for URLBuilder class. - :param str target: A string to be used in the URL to identify the report. - :param list[str] supported_extensions: The different file types available for download. - :param str | None default_extension: The default file type to download, defaults to None + :param str target: A string to be used in + the URL to identify the report. + :param list[str] supported_extensions: The + different file types available for download. + :param str | None default_extension: The default + file type to download, defaults to None """ self.target = target self.supported_extensions = supported_extensions @@ -57,8 +62,10 @@ def build_url( ) -> str: """Builds the URL to download from. - :param str | None file_extension: The file type to download. If None, the default extension is used. - :param datetime.datetime | None ddatetime: The datetime to download the report for. + :param str | None file_extension: The file type + to download. If None, the default extension is used. + :param datetime.datetime | None ddatetime: The datetime + to download the report for. :return str: A URL to download the report from. """ pass @@ -67,14 +74,18 @@ def _build_url_extension_check( self, file_extension: str | None, ) -> str: - """Checks the file extension and returns it if it is supported. + """Checks the file extension and returns it if it + is supported. - :param str | None file_extension: The file extension to check. If None, the default extension is used. + :param str | None file_extension: The file extension + to check. If None, the default extension is used. :return str: The file extension if it is supported. """ if file_extension is None: if self.default_extension is None: - raise ValueError("No file extension provided and no default extension set.") + raise ValueError( + "No file extension provided and no default extension set." + ) file_extension = self.default_extension @@ -88,11 +99,13 @@ def add_to_datetime( ddatetime: datetime.datetime | None, direction: int, ) -> datetime.datetime | None: - """Changes the datetime by one unit in the direction specified according to URL generator if this - URL builder uses it, otherwise leaves it unchanged. + """Changes the datetime by one unit in the direction specified + according to URL generator if this URL builder uses it, otherwise + leaves it unchanged. :param datetime.datetime | None ddatetime: The datetime to change. - :param int direction: The multiple for the increment (negative for backwards increment). + :param int direction: The multiple for the increment (negative + for backwards increment). :return datetime.datetime: The new datetime. """ return ddatetime @@ -119,7 +132,6 @@ def build_url( ddatetime: datetime.datetime | None = None, ) -> str: file_extension = self._build_url_extension_check(file_extension) - res = self._format_url.replace(URLBuilder.extension_placeholder, file_extension) return res @@ -145,7 +157,6 @@ def build_url( ddatetime: datetime.datetime | None = None, ) -> str: file_extension = self._build_url_extension_check(file_extension) - res = self._format_url.replace(URLBuilder.extension_placeholder, file_extension) return res @@ -161,9 +172,12 @@ def __init__( """Constructor for MISOMarketReportsURLBuilder class. :param str target: The target of the URL. - :param list[str] supported_extensions: The supported extensions for the URL. - :param Callable[[datetime.datetime | None, str], str] url_generator: The function to generate the URL. - :param str | None default_extension: The default file type to download, defaults to None + :param list[str] supported_extensions: The supported + extensions for the URL. + :param Callable[[datetime.datetime | None, str], str] url_generator: + The function to generate the URL. + :param str | None default_extension: + The default file type to download, defaults to None """ super().__init__( target=target, @@ -194,7 +208,8 @@ def add_to_datetime( in the direction specified. :param datetime.datetime | None ddatetime: The datetime to change. - :param int direction: The multiple for the increment (negative for backwards increment). + :param int direction: The multiple for the increment (negative + for backwards increment). :return datetime.datetime: The new datetime. """ default_increment_mappings: dict[Callable[[datetime.datetime | None, str], str], relativedelta] = { @@ -214,7 +229,7 @@ def add_to_datetime( self.increment_mappings.update(default_increment_mappings) if self.url_generator not in self.increment_mappings.keys(): - raise ValueError("This URL generator has no mapped increment.") + raise ValueError("This URL generator has no mapped increment.") if ddatetime is None: return None @@ -360,11 +375,15 @@ def __init__( ): """Constructor for Report class. - :param URLBuilder url_builder: The URL builder to be used for the report. - :param str type_to_parse: The type of the file to pass as input into the parser. - :param Callable[[requests.Response], pd.DataFrame] parser: The parser for the report. + :param URLBuilder url_builder: The URL builder to be + used for the report. + :param str type_to_parse: The type of the file to pass + as input into the parser. + :param Callable[[requests.Response], pd.DataFrame] parser: + The parser for the report. :param str example_url: An example URL for the report. - :param datetime.datetime | None example_datetime: An example datetime for the report (this should match the example_url). + :param datetime.datetime | None example_datetime: An example + datetime for the report (this should match the example_url). """ self.url_builder = url_builder self.type_to_parse = type_to_parse @@ -386,7 +405,8 @@ def get_url( :param str report_name: The name of the report. :param str file_extension: The type of file to download. - :param datetime.datetime | None ddatetime: The date of the report, defaults to None + :param datetime.datetime | None ddatetime: The date + of the report, defaults to None :return str: The URL to download the report from. """ if report_name not in MISOReports.report_mappings: @@ -412,7 +432,8 @@ def get_response( :param str report_name: The name of the report. :param str file_extension: The type of file to download. - :param datetime.datetime | None ddatetime: The date of the report, defaults to None + :param datetime.datetime | None ddatetime: The date of the report, + defaults to None :param int | None timeout: The timeout for the request, defaults to None :return requests.Response: The response object for the request. """ @@ -437,7 +458,8 @@ def _get_response_helper( """Helper function to get the response in the report download. :param str url: The URL to download the report from. - :param int | None timeout: The timeout limit for the request, defaults to None + :param int | None timeout: The timeout limit for the request, + defaults to None :return requests.Response: The response object for the request. """ res = requests.get( @@ -460,7 +482,8 @@ def get_df( :param str report_name: The name of the report. :param str | None url: A url to download directly from, defaults to None - :param datetime.datetime | None ddatetime: The date of the report, defaults to None + :param datetime.datetime | None ddatetime: The date of the report, + defaults to None :param int | None timeout: The timeout for the request, defaults to None :return pd.DataFrame: A DataFrame containing the data of the report. """ @@ -484,7 +507,8 @@ def get_data( :param str report_name: The name of the report. :param str | None url: The url to download from, defaults to None - :param datetime.datetime | None ddatetime: The target datetime to download the report for, defaults to None + :param datetime.datetime | None ddatetime: The target datetime to + download the report for, defaults to None :param int | None timeout: The timeout for the request, defaults to None :return Data: An object containing the DataFrame and the response. """ @@ -518,12 +542,14 @@ def add_to_datetime( ddatetime: datetime.datetime | None, direction: int, ) -> datetime.datetime | None: - """Changes the datetime by one unit in the direction specified according to the report - if this report allows for target dates, otherwise leaves it unchanged. + """Changes the datetime by one unit in the direction + specified according to the report if this report allows for + target dates, otherwise leaves it unchanged. :param str report_name: The name of the report. :param datetime.datetime | None ddatetime: The datetime to add to. - :param int direction: The multiple for the increment (negative for backwards increment). + :param int direction: The multiple for the increment (negative + for backwards increment). :return datetime.datetime: The new datetime. """ if report_name not in MISOReports.report_mappings: @@ -640,7 +666,7 @@ def add_to_datetime( ), type_to_parse="xls", parser=parsers.parse_da_bc, - example_url="https://docs.misoenergy.org/marketreports/20220101_da_bc.xls", + example_url="https://docs.misoenergy.org/marketreports/20240101_da_bc.xls", example_datetime=datetime.datetime(year=2024, month=1, day=1), ), @@ -774,7 +800,7 @@ def add_to_datetime( example_datetime=datetime.datetime(year=2022, month=1, day=1), ), - "ms_vlr_srw": Report( # TODO need to update because rows change and so second df gets moved down. + "ms_vlr_srw": Report( # Checked 2024-12-21 url_builder=MISOMarketReportsURLBuilder( target="ms_vlr_srw", supported_extensions=["xlsx"], @@ -1082,7 +1108,7 @@ def add_to_datetime( url_builder=MISOMarketReportsURLBuilder( target="DA_LMPs", supported_extensions=["zip"], - url_generator=MISOMarketReportsURLBuilder.url_generator_YYYY_current_month_name_to_two_months_later_name_first, + url_generator=MISOMarketReportsURLBuilder.url_generator_YYYY_underscore_current_month_name_to_two_months_later_name_first, default_extension="zip", ), type_to_parse="zip", @@ -1395,7 +1421,7 @@ def add_to_datetime( example_datetime=datetime.datetime(year=2024, month=4, day=1), ), - "ftr_annual_results_round_1": Report( # TODO review reworked implementation. + "ftr_annual_results_round_1": Report( # Checked 2024-12-21. url_builder=MISOMarketReportsURLBuilder( target="ftr_annual_results_round_1", supported_extensions=["zip"], @@ -1408,7 +1434,7 @@ def add_to_datetime( example_datetime=datetime.datetime(year=2022, month=4, day=1), ), - "ftr_annual_results_round_2": Report( # TODO review reworked implementation. + "ftr_annual_results_round_2": Report( # Checked 2024-12-21. url_builder=MISOMarketReportsURLBuilder( target="ftr_annual_results_round_2", supported_extensions=["zip"], @@ -1421,7 +1447,7 @@ def add_to_datetime( example_datetime=datetime.datetime(year=2022, month=1, day=1), ), - "ftr_annual_results_round_3": Report( # TODO review reworked implementation. + "ftr_annual_results_round_3": Report( # Checked 2024-12-21. url_builder=MISOMarketReportsURLBuilder( target="ftr_annual_results_round_3", supported_extensions=["zip"], @@ -1434,7 +1460,7 @@ def add_to_datetime( example_datetime=datetime.datetime(year=2022, month=1, day=1), ), - "ftr_annual_bids_offers": Report( # TODO review reworked implementation. + "ftr_annual_bids_offers": Report( # Checked 2024-12-21. url_builder=MISOMarketReportsURLBuilder( target="ftr_annual_bids_offers", supported_extensions=["zip"], @@ -1447,7 +1473,7 @@ def add_to_datetime( example_datetime=datetime.datetime(year=2024, month=1, day=1), ), - "ftr_mpma_results": Report( # TODO review reworked implementation. + "ftr_mpma_results": Report( # Checked 2024-12-21 url_builder=MISOMarketReportsURLBuilder( target="ftr_mpma_results", supported_extensions=["zip"], @@ -1782,7 +1808,7 @@ def add_to_datetime( ), type_to_parse="zip", parser=parsers.parse_da_co, - example_url="https://docs.misoenergy.org/marketreports/20240501_da_rpe.xls", + example_url="https://docs.misoenergy.org/marketreports/20240501_da_co.zip", example_datetime=datetime.datetime(year=2024, month=5, day=1), ), diff --git a/MISOReports/parsers.py b/MISOReports/parsers.py index 3c8749c..b4d637a 100644 --- a/MISOReports/parsers.py +++ b/MISOReports/parsers.py @@ -1,3 +1,4 @@ +import warnings import os import datetime from collections import defaultdict @@ -537,40 +538,61 @@ def parse_Total_Uplift_by_Resource( def parse_ms_vlr_srw( res: requests.Response, ) -> pd.DataFrame: - float_columns = ["DA VLR RSG MWP", "RT VLR RSG MWP", "DA+RT Total"] - string_columns = ["Constraint"] - column_names = string_columns + float_columns - - df1 = pd.read_excel( - io=io.BytesIO(res.content), - skiprows=7, - nrows=3, - usecols=column_names, + warnings.warn( + "This report is unpredictable in the number of tables " + + "it contains and the number of rows in each table. " + + "This parser may break in the next report." ) - df1[float_columns] = df1[float_columns].astype("Float64") - df1[string_columns] = df1[string_columns].astype("string") - df2 = pd.read_excel( - io=io.BytesIO(res.content), - skiprows=23, - nrows=5, - usecols=column_names, + def get_single_table( + res: requests.Response, + skiprows: int, + ) -> pd.DataFrame: + float_columns = ["DA VLR RSG MWP", "RT VLR RSG MWP", "DA+RT Total"] + string_columns = ["Constraint"] + column_names = string_columns + float_columns + + # The number of rows in each table can change. + # The tables end on the row where the "Constraint" column is "Total". + df0 = pd.read_excel( + io=io.BytesIO(res.content), + skiprows=skiprows, + usecols=column_names, + ) + + df0_constraint = df0[df0["Constraint"] == "Total"] + first_total_idx = df0_constraint.index.min() + + res = df0.iloc[:first_total_idx + 1, :].copy() + + res[float_columns] = res[float_columns].astype("Float64") + res[string_columns] = res[string_columns].astype("string") + + return res + + df1 = get_single_table(res=res, skiprows=7) + + df2 = get_single_table(res=res, skiprows=7 + df1.shape[0] + 5) + + df3 = get_single_table( + res=res, + skiprows=7 + df1.shape[0] + 5 + df2.shape[0] + 5, ) - df2[float_columns] = df2[float_columns].astype("Float64") - df2[string_columns] = df2[string_columns].astype("string") - df = pd.DataFrame({ + res = pd.DataFrame({ MULTI_DF_NAMES_COLUMN: [ "Central", + "North", "South", ], MULTI_DF_DFS_COLUMN: [ df1, df2, + df3, ], }) - return df + return res def parse_ms_rsg_srw( @@ -1247,14 +1269,18 @@ def parse_DA_LMPs( with zipfile.ZipFile(file=io.BytesIO(res.content)) as z: text = z.read(z.namelist()[0]).decode("utf-8") - csv_data = "\n".join(text.splitlines()[1:]) + csv_data = "\n".join(text.lstrip().splitlines()) df = pd.read_csv( filepath_or_buffer=io.StringIO(csv_data), ) df[["MARKET_DAY"]] = df[["MARKET_DAY"]].apply(pd.to_datetime, format="%m/%d/%Y") - df[["HE1", "HE2", "HE3", "HE4", "HE5", "HE6", "HE7", "HE8", "HE9", "HE10", "HE11", "HE12", "HE13", "HE14", "HE15", "HE16", "HE17", "HE18", "HE19", "HE20", "HE21", "HE22", "HE23", "HE24"]] = df[["HE1", "HE2", "HE3", "HE4", "HE5", "HE6", "HE7", "HE8", "HE9", "HE10", "HE11", "HE12", "HE13", "HE14", "HE15", "HE16", "HE17", "HE18", "HE19", "HE20", "HE21", "HE22", "HE23", "HE24"]].astype("Float64") + + float_columns = ["HE1", "HE2", "HE3", "HE4", "HE5", "HE6", "HE7", "HE8", "HE9", "HE10", "HE11", "HE12", "HE13", "HE14", "HE15", "HE16", "HE17", "HE18", "HE19", "HE20", "HE21", "HE22", "HE23", "HE24"] + df[float_columns] = df[float_columns].astype("string") + df[float_columns] = df[float_columns].apply(lambda x: x.str.replace(',', '')) + df[float_columns] = df[float_columns].astype("Float64") df[["NODE", "TYPE", "VALUE"]] = df[["NODE", "TYPE", "VALUE"]].astype("string") return df diff --git a/MISOReports/test_MISOReports.py b/MISOReports/test_MISOReports.py index ffd4227..50e8b8e 100644 --- a/MISOReports/test_MISOReports.py +++ b/MISOReports/test_MISOReports.py @@ -19,27 +19,46 @@ ) +""" +The main tests for MISOReports. Most of the tests are for ensuring +that the column names and column types of the dfs have their respective +expected values. There are currently no tests against the actual +source values of the reports. +""" + + def try_to_get_dfs( report_name: str, datetime_increment_limit: int, number_of_dfs_to_stop_at: int, ) -> tuple[list[pd.DataFrame], list[datetime.datetime]]: - """Tries to get the df for the report_name and returns it with its respective - target datetime. If a request fails, it will increment the datetime and try - again up to datetime_increment_limit times. - - :param str report_name: The name of the report to get the df for. - :param int datetime_increment_limit: The number of times to try to get the df before raising an error. - :param int number_of_dfs_to_stop_at: The number of successfully downloaded dfs to stop at. - :return tuple[list[pd.DataFrame], list[datetime.datetime]]: The dfs and the target dates they were downloaded for. + """Tries to get the df for report_name and returns + it with its respective target datetime. If a request + fails, it will increment the datetime and try again + up to datetime_increment_limit times. The starting + datetime used is the example_datetime for the report. + If not a single df is successfully returned after + datetime_increment_limit increments, a ValueError is + raised. + + :param str report_name: The name of the report to get + the df for. + :param int datetime_increment_limit: The number of times + to try to get the df. + :param int number_of_dfs_to_stop_at: The number of + successfully downloaded dfs to stop at. + :return tuple[list[pd.DataFrame], list[datetime.datetime]]: + The dfs and the target dates they were downloaded for. """ report_mappings = MISOReports.report_mappings report = report_mappings[report_name] - increment_cnt = 0 - curr_target_datetime = report.example_datetime dfs = [] target_datetimes = [] + + increment_cnt = 0 + curr_target_datetime = report.example_datetime + url_builder = report.url_builder while increment_cnt <= datetime_increment_limit: try: df = MISOReports.get_df( @@ -54,13 +73,13 @@ def try_to_get_dfs( if len(dfs) >= number_of_dfs_to_stop_at: break - curr_target_datetime = report.url_builder.add_to_datetime( + curr_target_datetime = url_builder.add_to_datetime( ddatetime=curr_target_datetime, direction=1, ) increment_cnt += 1 except requests.HTTPError as e: - curr_target_datetime = report.url_builder.add_to_datetime( + curr_target_datetime = url_builder.add_to_datetime( ddatetime=curr_target_datetime, direction=1, ) @@ -68,9 +87,17 @@ def try_to_get_dfs( if increment_cnt > datetime_increment_limit: if len(dfs) == 0: - raise ValueError(f"Failed to get a df after {datetime_increment_limit} datetime increments (last target datetime tried: {curr_target_datetime}).") + raise ValueError( + f"Failed to get a df after {datetime_increment_limit} " + + f"datetime increments (last target datetime tried: " + + f"{curr_target_datetime})." + ) else: - warnings.warn(f"Only got {len(dfs)}/{number_of_dfs_to_stop_at} dfs after {datetime_increment_limit} attempts (last target datetime tried: {curr_target_datetime}).") + warnings.warn( + f"Only got {len(dfs)}/{number_of_dfs_to_stop_at} dfs " + + f"after {datetime_increment_limit} attempts (last target " + + f"datetime tried: {curr_target_datetime})." + ) return dfs, target_datetimes @@ -83,9 +110,12 @@ def uses_correct_dtypes( """Checks if the columns in the df have the correct dtypes. :param pd.DataFrame df: The df to check the dtypes of. - :param list[str] columns: The columns to check the dtypes of. - :param Callable[[object], bool] dtype_checker: The function to check the dtypes with. - :return bool: True if the columns have the correct dtypes, False otherwise. + :param list[str] columns: The columns to check the dtypes of + in the df. + :param Callable[[object], bool] dtype_checker: The function + to check the dtypes with. + :return bool: True if the columns have the correct dtypes, + False otherwise. """ for column in columns: if not dtype_checker(df[column]): @@ -96,6 +126,8 @@ def uses_correct_dtypes( @pytest.fixture def get_df_test_names(): + """Returns the names of the reports to test get_df for. + """ single_df_tests = [v[0] for v in single_df_test_list] multiple_dfs_tests = [v[0] for v in multiple_dfs_test_list] nsi_tests = nsi_test_list @@ -121,6 +153,25 @@ def number_of_dfs_to_stop_at(request): return request.config.getoption("--number-of-dfs-to-stop-at") +def test_MISOMarketReports_report_example_url_matches_example_datetime(): + report_mappings = MISOReports.report_mappings + for report_name, report in report_mappings.items(): + if type(report.url_builder) is not MISOMarketReportsURLBuilder: + continue + + url_builder = report.url_builder + example_datetime = report.example_datetime + example_url = report.example_url + + generated_url = url_builder.build_url( + ddatetime=example_datetime, + file_extension=report.type_to_parse, + ) + + assert generated_url == example_url, \ + f"{report_name}: {generated_url} != {example_url}" + + def test_MISOMarketReportsURLBuilder_add_to_datetime_has_an_increment_mapping_for_all_url_generators(): url_generators = [] for func_str in dir(MISOMarketReportsURLBuilder): @@ -1456,6 +1507,10 @@ def test_get_df_single_df_correct_columns(report_name, columns_mapping, datetime ("DA VLR RSG MWP", "RT VLR RSG MWP", "DA+RT Total",): pd.api.types.is_float_dtype, ("Constraint",): pd.api.types.is_string_dtype, }, + "North": { + ("DA VLR RSG MWP", "RT VLR RSG MWP", "DA+RT Total",): pd.api.types.is_float_dtype, + ("Constraint",): pd.api.types.is_string_dtype, + }, "South": { ("DA VLR RSG MWP", "RT VLR RSG MWP", "DA+RT Total",): pd.api.types.is_float_dtype, ("Constraint",): pd.api.types.is_string_dtype, @@ -1984,7 +2039,7 @@ def test_MISOMarketReports_add_to_datetime( ) assert new_datetime == expected, f"Expected {expected}, got {new_datetime}." - + nsi_test_list = [ "nsi1",