Skip to content

Commit

Permalink
Updated get_df tests to print the target date if the test fails as well.
Browse files Browse the repository at this point in the history
  • Loading branch information
BrianWeiHaoMa committed Dec 15, 2024
1 parent da12cfd commit 1e97126
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 44 deletions.
8 changes: 4 additions & 4 deletions DOCUMENTATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ This is the documentation for MISOReports.

## Data Types
All dataframe columns are categorized into one of the following data types:
* **pandas.core.arrays.string_.StringDtype()** ex. "Toronto"
* **numpy.dtypes.DateTime64DType()** ex. "2024-02-02 08:24:36 PM" or "2024-02-02 16:24:36" or "2024-01-03" or "13:05:00" etc.
* **numpy.dtypes.Float64DType()** ex. 34.13
* **pandas.core.arrays.integer.Int64Dtype()** ex. 34
* **string** ex. "Toronto".
* **datetime64\[ns\]** ex. "2024-02-02 08:24:36 PM" or "2024-02-02 16:24:36" or "2024-01-03" or "13:05:00" etc..
* **Float64** ex. 34.13.
* **Int64** ex. 34.

## Supported Reports
Here are the supported reports along with corresponding example URLs. If the report offers multiple formats,
Expand Down
12 changes: 6 additions & 6 deletions MISOReports/MISOReports.py
Original file line number Diff line number Diff line change
Expand Up @@ -1443,8 +1443,8 @@ def add_to_datetime(
),
type_to_parse="zip",
parser=parsers.parse_ftr_annual_bids_offers,
example_url="https://docs.misoenergy.org/marketreports/2022_ftr_annual_bids_offers.zip",
example_datetime=datetime.datetime(year=2022, month=1, day=1),
example_url="https://docs.misoenergy.org/marketreports/2024_ftr_annual_bids_offers.zip",
example_datetime=datetime.datetime(year=2024, month=1, day=1),
),

"ftr_mpma_results": Report( # TODO review reworked implementation.
Expand Down Expand Up @@ -1730,8 +1730,8 @@ def add_to_datetime(
),
type_to_parse="zip",
parser=parsers.parse_asm_da_co,
example_url="https://docs.misoenergy.org/marketreports/20240729_asm_da_co.zip",
example_datetime=datetime.datetime(year=2024, month=7, day=29),
example_url="https://docs.misoenergy.org/marketreports/20240601_asm_da_co.zip",
example_datetime=datetime.datetime(year=2024, month=6, day=1),
),

"asm_rt_co": Report( # Checked 2024-12-15.
Expand Down Expand Up @@ -1782,8 +1782,8 @@ def add_to_datetime(
),
type_to_parse="zip",
parser=parsers.parse_da_co,
example_url="https://docs.misoenergy.org/marketreports/20241007_da_rpe.xls",
example_datetime=datetime.datetime(year=2024, month=10, day=7),
example_url="https://docs.misoenergy.org/marketreports/20240501_da_rpe.xls",
example_datetime=datetime.datetime(year=2024, month=5, day=1),
),

"cpnode_reszone": Report( # Checked 2024-12-15.
Expand Down
3 changes: 0 additions & 3 deletions MISOReports/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,6 @@ def parse_Resource_Uplift_by_Commitment_Reason(
}
).iloc[:-2]

print(df)
exit(1)

df[["ECONOMIC MAX"]] = df[["ECONOMIC MAX"]].astype("Float64")
df[["LOCAL RESOURCE ZONE"]] = df[["LOCAL RESOURCE ZONE"]].astype("Int64")
df[["STARTTIME"]] = df[["STARTTIME"]].apply(pd.to_datetime, format="%Y/%m/%d %I:%M:%S %p")
Expand Down
64 changes: 33 additions & 31 deletions MISOReports/test_MISOReports.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,52 +23,56 @@ def try_to_get_dfs(
report_name: str,
datetime_increment_limit: int,
number_of_dfs_to_stop_at: int,
) -> list[pd.DataFrame]:
"""Tries to get the df for the report_name and returns it. If a request fails, it will
increment the datetime and try again up to datetime_increment_limit times.
) -> tuple[list[pd.DataFrame], list[datetime.datetime]]:
"""Tries to get the df for the report_name and returns it with its respective
target datetime. If a request fails, it will increment the datetime and try
again up to datetime_increment_limit times.
:param str report_name: The name of the report to get the df for.
:param int datetime_increment_limit: The number of times to try to get the df before raising an error.
:param int number_of_dfs_to_stop_at: The number of successfully downloaded dfs to stop at.
:return pd.DataFrame: The df for the report_name.
:return tuple[list[pd.DataFrame], list[datetime.datetime]]: The dfs and the target dates they were downloaded for.
"""
report_mappings = MISOReports.report_mappings
report = report_mappings[report_name]

increment_cnt = 0
curr_target_date = report.example_datetime
curr_target_datetime = report.example_datetime
dfs = []
target_datetimes = []
while increment_cnt <= datetime_increment_limit:
try:
df = MISOReports.get_df(
report_name=report_name,
ddatetime=curr_target_date,
ddatetime=curr_target_datetime,
)

dfs.append(df)
if not df.empty:
dfs.append(df)
target_datetimes.append(curr_target_datetime)

if len(dfs) >= number_of_dfs_to_stop_at:
break

curr_target_date = report.url_builder.add_to_datetime(
ddatetime=curr_target_date,
curr_target_datetime = report.url_builder.add_to_datetime(
ddatetime=curr_target_datetime,
direction=1,
)
increment_cnt += 1
except requests.HTTPError as e:
curr_target_date = report.url_builder.add_to_datetime(
ddatetime=curr_target_date,
curr_target_datetime = report.url_builder.add_to_datetime(
ddatetime=curr_target_datetime,
direction=1,
)
increment_cnt += 1

if increment_cnt > datetime_increment_limit:
if len(dfs) == 0:
raise ValueError(f"Failed to get a df after {datetime_increment_limit} attempts (last target datetime tried: {curr_target_date}).")
raise ValueError(f"Failed to get a df after {datetime_increment_limit} datetime increments (last target datetime tried: {curr_target_datetime}).")
else:
warnings.warn(f"Only got {len(dfs)}/{number_of_dfs_to_stop_at} dfs after {datetime_increment_limit} attempts (last target datetime tried: {curr_target_date}).")
warnings.warn(f"Only got {len(dfs)}/{number_of_dfs_to_stop_at} dfs after {datetime_increment_limit} attempts (last target datetime tried: {curr_target_datetime}).")

return dfs
return dfs, target_datetimes


def uses_correct_dtypes(
Expand Down Expand Up @@ -248,7 +252,7 @@ def test_MISOMarketReportsURLBuilder_build_url(
url_builder = MISOMarketReportsURLBuilder(
target=target,
supported_extensions=supported_extensions,
url_generator=url_generator
url_generator=url_generator,
)

assert url_builder.build_url(ddatetime=ddatetime, file_extension=file_extension) == expected
Expand Down Expand Up @@ -980,28 +984,28 @@ def test_MISOMarketReportsURLBuilder_build_url(
"report_name, columns_mapping", single_df_test_list
)
def test_get_df_single_df_correct_columns(report_name, columns_mapping, datetime_increment_limit, number_of_dfs_to_stop_at):
dfs = try_to_get_dfs(
dfs, target_datetimes = try_to_get_dfs(
report_name=report_name,
datetime_increment_limit=datetime_increment_limit,
number_of_dfs_to_stop_at=number_of_dfs_to_stop_at,
)

for df in dfs:
for df, target_datetime in zip(dfs, target_datetimes):
columns_mapping_columns = []
for columns_group in columns_mapping.keys():
columns_mapping_columns.extend(columns_group)

columns_mapping_columns_set = frozenset(columns_mapping_columns)
df_columns_set = frozenset(df.columns)

if columns_mapping_columns_set != df_columns_set:
raise ValueError(f"Expected columns {columns_mapping_columns_set} do not match df columns {df_columns_set}.")
assert columns_mapping_columns_set == df_columns_set, \
f"For report {report_name}, expected columns {columns_mapping_columns_set} do not match df columns {df_columns_set}. Target datetime: {target_datetime}."

for columns_tuple, dtype_checker in columns_mapping.items():
columns = list(columns_tuple)

assert uses_correct_dtypes(df, columns, dtype_checker), \
f"For report {report_name}, columns {columns} are not of type {dtype_checker}."
f"For report {report_name}, columns {columns} are not of type {dtype_checker}. Target datetime: {target_datetime}."


multiple_dfs_test_list = [
Expand Down Expand Up @@ -1860,13 +1864,13 @@ def test_get_df_single_df_correct_columns(report_name, columns_mapping, datetime
"report_name, dfs_mapping", multiple_dfs_test_list
)
def test_get_df_multiple_dfs_correct_columns_and_matching_df_names(report_name, dfs_mapping, datetime_increment_limit, number_of_dfs_to_stop_at):
dfs = try_to_get_dfs(
dfs, target_datetimes = try_to_get_dfs(
report_name=report_name,
datetime_increment_limit=datetime_increment_limit,
number_of_dfs_to_stop_at=number_of_dfs_to_stop_at,
)

for df in dfs:
for df, target_datetime in zip(dfs, target_datetimes):
# Check that df names are as expected.
expected_df_names = frozenset(dfs_mapping.keys())
actual_df_names = frozenset(list(df[MULTI_DF_NAMES_COLUMN]))
Expand All @@ -1886,14 +1890,14 @@ def test_get_df_multiple_dfs_correct_columns_and_matching_df_names(report_name,
res_df_columns_set = frozenset(res_df.columns)

# Check that the columns in the df match the expected columns.
if columns_mapping_columns_set != res_df_columns_set:
raise ValueError(f"Expected columns {columns_mapping_columns_set} do not match df columns {res_df_columns_set}.")
assert columns_mapping_columns_set == res_df_columns_set, \
f"Expected columns {columns_mapping_columns_set} do not match df columns {res_df_columns_set}. Target datetime {target_datetime}."

for columns_tuple, dtype_checker in columns_mapping.items():
columns = list(columns_tuple)

assert uses_correct_dtypes(res_df, columns, dtype_checker), \
f"For multi-df report {report_name}, df {df_name}, columns {columns} do not pass {dtype_checker.__name__}."
f"For multi-df report {report_name}, df {df_name}, columns {columns} do not pass {dtype_checker.__name__}. Target datetime {target_datetime}."


def test_get_df_test_test_names_have_no_duplicates(get_df_test_names):
Expand Down Expand Up @@ -2019,13 +2023,13 @@ def test_get_df_ftr_mpma_results_with_changing_columns(report_name, datetime_inc
the same amount of files for each section. Each file within their respective
sections should have the same typing.
"""
dfs = try_to_get_dfs(
dfs, target_datetimes = try_to_get_dfs(
report_name=report_name,
datetime_increment_limit=datetime_increment_limit,
number_of_dfs_to_stop_at=number_of_dfs_to_stop_at,
)

for df in dfs:
for df, target_datetime in zip(dfs, target_datetimes):
for i, name in enumerate(df[MULTI_DF_NAMES_COLUMN]):
if name == "Metadata":
n_files = len(df[MULTI_DF_DFS_COLUMN].iloc[i].columns)
Expand Down Expand Up @@ -2061,8 +2065,7 @@ def test_get_df_ftr_mpma_results_with_changing_columns(report_name, datetime_inc
for i, name in enumerate(df[MULTI_DF_NAMES_COLUMN]):
if name != "Metadata":
reg = re.search(r"File (\d+)", name)
if reg is None:
raise ValueError(f"Expected name to match regex, got {name}.")
assert reg is not None, f"Expected name to match regex, got {name}."

file_number = int(reg.group(1))

Expand All @@ -2072,5 +2075,4 @@ def test_get_df_ftr_mpma_results_with_changing_columns(report_name, datetime_inc

for columns, dtype_checker in types.items():
assert uses_correct_dtypes(df[MULTI_DF_DFS_COLUMN].iloc[i], columns, dtype_checker), \
f"For multi-df report {report_name}, df {name}, columns {columns} do not pass {dtype_checker.__name__}."

f"For multi-df report {report_name}, df {name}, columns {columns} do not pass {dtype_checker.__name__}. Target datetime {target_datetime}."

0 comments on commit 1e97126

Please sign in to comment.