Updated documentation.

BrianWeiHaoMa · Dec 22, 2024 · 30640e2 · 30640e2
1 parent 56b1ec6
commit 30640e2
Show file tree

Hide file tree

Showing 6 changed files with 115 additions and 110 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -21,7 +21,7 @@ If you encounter any bugs, please let us know! To make your bug report as effect
 - Steps to reproduce the bug
 - The expected vs. actual behavior
 - Screenshots, error messages, or code snippets if applicable
-- Environment details (e.g., operating system, browser, version)
+- Environment details
 
 You can report bugs by [opening an issue](https://github.com/BrianWeiHaoMa/MISOReports/issues/new?template=bug_report.md). We’ll do our best to address them as soon as possible
 

diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
@@ -1,32 +1,14 @@
 # Development
 Here you can find the general guidelines for the development of MISOReports.
 
-## Github Workflow
-Before commiting or pushing to github, remember to run these in the terminal and make sure everything passes:
-
-For running all tests:
-```
-pytest 
-```
-
-If you want to skip the completion and long tests:
-```
-pytest -m "not completion and not long"
-```
-
-For checking type annotations:
-```
-mypy --strict .\MISOReports\MISOReports.py 
-```
-
 ## Coding Style
 * We are using the vscode extension, autoDocstring's, one-line-sphinx documentation template.
 * Try to keep the style the same as the code that was previously there in all respects (naming schemes, character length per line, etc.) 
 * Try too keep the line length to PEP8 standards. Exceptions where it makes sense is fine.
 
 ## Reports to Pandas Dataframe Mapping Logic
 Remember to make a parsing function in parsers.py and make a new Report entry in MISOReports.report_mappings.
-As well, make sure to add the report's test in single_df_test_list or multiple_dfs_test_list in test_MISOReports.py.
+As well, make sure to add the report's get_df test in test_MISOReports.py.
 Continue to use the same naming scheme as the previous code.
 
 When in doubt, check the entries for previously completed reports.
@@ -37,7 +19,7 @@ Map every single column type to one of the below data pandas types:
 * **Float64** ex. 34.13.
 * **Int64** ex. 34.
 
-When looking at the report, use this checklist:
+When typing Dataframe columns, use this checklist:
 * Ignore null/empty values when deciding with the below guidelines.
 * If there is any string (ex. names, codes, etc.) in the column, the column type should be **string**.
 * Otherwise if the column is clearly meant to portray datetime/date/time, the column type should be **datetime64[ns]**.
@@ -49,7 +31,7 @@ When looking at the report, use this checklist:
 csv
 ```python
 def parse_rt_lmp_prelim(
-    res: requests.Response,
+        res: requests.Response,
 ) -> pd.DataFrame:
     text = res.text
     csv_data = "\n".join(text.splitlines()[4:])
@@ -67,7 +49,7 @@ def parse_rt_lmp_prelim(
 json
 ```python
 def parse_SolarForecast(
-    res: requests.Response,
+        res: requests.Response,
 ) -> pd.DataFrame:
     text = res.text
     dictionary = json.loads(text)
@@ -86,19 +68,23 @@ def parse_SolarForecast(
 zip with a csv file in the extracted folder
 ```python
 def parse_DA_LMPs(
-    res: requests.Response,
+        res: requests.Response,
 ) -> pd.DataFrame:
     with zipfile.ZipFile(file=io.BytesIO(res.content)) as z:
         text = z.read(z.namelist()[0]).decode("utf-8")
 
-    csv_data = "\n".join(text.splitlines()[1:])
+    csv_data = "\n".join(text.lstrip().splitlines())
 
     df = pd.read_csv(
         filepath_or_buffer=io.StringIO(csv_data),
     )
 
     df[["MARKET_DAY"]] = df[["MARKET_DAY"]].apply(pd.to_datetime, format="%m/%d/%Y")
-    df[["HE1", "HE2", "HE3", "HE4", "HE5", "HE6", "HE7", "HE8", "HE9", "HE10", "HE11", "HE12", "HE13", "HE14", "HE15", "HE16", "HE17", "HE18", "HE19", "HE20", "HE21", "HE22", "HE23", "HE24"]] = df[["HE1", "HE2", "HE3", "HE4", "HE5", "HE6", "HE7", "HE8", "HE9", "HE10", "HE11", "HE12", "HE13", "HE14", "HE15", "HE16", "HE17", "HE18", "HE19", "HE20", "HE21", "HE22", "HE23", "HE24"]].astype("Float64")
+
+    float_columns = ["HE1", "HE2", "HE3", "HE4", "HE5", "HE6", "HE7", "HE8", "HE9", "HE10", "HE11", "HE12", "HE13", "HE14", "HE15", "HE16", "HE17", "HE18", "HE19", "HE20", "HE21", "HE22", "HE23", "HE24"]
+    df[float_columns] = df[float_columns].astype("string")
+    df[float_columns] = df[float_columns].apply(lambda x: x.str.replace(',', ''))
+    df[float_columns] = df[float_columns].astype("Float64")
     df[["NODE", "TYPE", "VALUE"]] = df[["NODE", "TYPE", "VALUE"]].astype("string")
 
     return df
@@ -107,7 +93,7 @@ def parse_DA_LMPs(
 excel
 ```python
 def parse_5min_exante_lmp(
-    res: requests.Response,
+        res: requests.Response,
 ) -> pd.DataFrame:
     df = pd.read_excel(
         io=io.BytesIO(res.content),

diff --git a/DOCUMENTATION.md b/DOCUMENTATION.md
@@ -19,7 +19,7 @@ This is the documentation for MISOReports.
         - [Real-Time](#real-time)
         - [Resource Adequacy](#resource-adequacy)
         - [Summary](#summary)
-- [Useful Tricks](#useful-tricks)
+ - [Disclaimer](#disclaimer)
 
 ## Data Types
 All dataframe columns are categorized into one of the following data types:
@@ -248,21 +248,18 @@ def parse_datetime_from_text(text: str):
 # Report link: https://api.misoenergy.org/MISORTWDDataBroker/DataBrokerServices.asmx?messageType=getNAI&returnType=csv.
 report_name = "NAI"
 
-# Download the raw data to get the datetime.
-res = MISOReports.get_response(
+# Download the relevant data.
+data = MISOReports.get_data(
     report_name=report_name,
     file_extension="csv",
 )
+text = data.response.text
+df = data.df
 
 print("Raw data:")
-print(res.text)
+print(text)
 
-# Download the dataframe.
-df = MISOReports.get_df(
-    report_name=report_name,
-)
-
-df["datetime"] = parse_datetime_from_text(res.text)
+df["datetime"] = parse_datetime_from_text(text)
 
 print("Final dataframe:")
 print(df)
@@ -271,12 +268,19 @@ print(df)
 Executing the above gives:
 ```
 Raw data:
-RefId,21-Nov-2024 - Interval 20:50 EST
+RefId,22-Dec-2024 - Interval 17:30 EST
 
 Name,Value
-MISO,-32.77
+MISO,2247.37
 
 Final dataframe:
-   Name  Value            datetime
-0  MISO -32.77 2024-11-21 20:50:00
+   Name    Value            datetime
+0  MISO  2247.37 2024-12-22 17:30:00
 ```
+
+## Disclaimer
+MISO might make changes to the structure of their reports in the future and this could lead to the current parsers breaking.
+It is reasonable to say that the majority of the reports supported by this library do not change very frequently
+but if you want to make sure that the parsers are updated as quickly as possible, we suggest cloning the repository
+and maintaining the parsers yourself. This way, you can leverage the rest of the library, while ensuring that the parsers
+stay updated.
diff --git a/MISOReports/MISOReports.py b/MISOReports/MISOReports.py
@@ -502,6 +502,7 @@ def get_data(
             url: str | None = None,
             ddatetime: datetime.datetime | None = None,
             timeout: int | None = None,
+            file_extension: str | None = None,
     ) -> Data:
         """Gets the relevant data for the report.
 
@@ -510,6 +511,8 @@ def get_data(
         :param datetime.datetime | None ddatetime: The target datetime to 
             download the report for, defaults to None
         :param int | None timeout: The timeout for the request, defaults to None
+        :param str | None file_extension: The file extension to download, defaults 
+            to None in which case the default file extension is used.
         :return Data: An object containing the DataFrame and the response.
         """
         report = MISOReports.report_mappings[report_name]
@@ -522,7 +525,7 @@ def get_data(
         else:
             response = MISOReports.get_response(
                 report_name=report_name, 
-                file_extension=report.type_to_parse, 
+                file_extension=file_extension, 
                 ddatetime=ddatetime,
                 timeout=timeout,
             )

diff --git a/MISOReports/test_MISOReports.py b/MISOReports/test_MISOReports.py
@@ -84,6 +84,11 @@ def try_to_get_dfs(
                 direction=1,
             )
             increment_cnt += 1
+        except Exception as e:
+            raise Exception(
+                f"Unexpected exception for {report_name} at "
+                + f"{curr_target_datetime}: {e}"
+            )
 
     if increment_cnt > datetime_increment_limit:
         if len(dfs) == 0: