Bug Fix: Pandas loc-chaining (#272)

RHammond2 · web-flow · commit a1175502dcac · 2024-02-09T14:44:15.000-08:00
* add updated hourly offset code

* use updated offsets for pandas

* fix missed offset

* update matched codes

* remove all soon-to-be-deprecated offsets

* swap out outdated offset

* update schemas for new offset usage

* final swap out of offsets

* finish identifying old offsets

* update offsets throughout examples

* convert lingering offsets

* update hardcoded offsets in analysis

* update hardcoded offsets in utils

* fix lingering offsets in tests

* update bokeh dependency

* rerun and fix all but cubico example

* update changelog and setup.py

* update M to ME and MS

* update docs examples

* refactor loc-chaining in example and add missing assignment

* udpate NA-dataframe deprecation warning

* update results for missing NaN assignment

* update changelog
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -24,6 +24,12 @@ All notable changes to this project will be documented in this file. If you make
 `pytest test/unit` or `pytest test/regression`.
 - Converts some configuration files into `pyproject.toml` settings to reduce visual clutter
   at the top-level of the directory.
+- Updates chained `.loc` expressions to be a single `.loc` expression in project_ENGIE.py to silence
+  a Pandas deprecation warning about future changes.
+- Adds a missing NaN assignment to `project_ENGIE.py:clean_scada`, which causes a slight change in
+  results for the TIE and wake loss regression tests.
+- `openoa.utils.timeseries.gap_fill_data_frame()` now returns the original data if there is no data
+  to fill in, avoiding a Pandas `concat` deprecation warning about pending behavioral changes.
 
 ## [3.0.1 - 2023-12-22]
 
diff --git a/examples/project_ENGIE.py b/examples/project_ENGIE.py
@@ -96,14 +96,11 @@ def clean_scada(scada_file: str | Path) -> pd.DataFrame:
 
         # Cancel out readings where the wind vane direction repeats more than 3 times in a row
         ix_flag = filters.unresponsive_flag(scada_df.loc[ix_turbine], 3, col=["Va_avg"])
-        scada_df.loc[ix_turbine].loc[ix_flag.values, sensor_cols]
+        scada_df.loc[ix_flag.loc[ix_flag["Va_avg"]].index, sensor_cols] = np.nan
 
         # Cancel out the temperature readings where the value repeats more than 20 times in a row
         ix_flag = filters.unresponsive_flag(scada_df.loc[ix_turbine], 20, col=["Ot_avg"])
-
-        # NOTE: ix_flag is flattened here because as a series it's shape = (N, 1) and
-        # incompatible with this style of indexing, so we need it as shape = (N,)
-        scada_df.loc[ix_turbine, "Ot_avg"].loc[ix_flag.values.flatten()] = np.nan
+        scada_df.loc[ix_flag.loc[ix_flag["Ot_avg"]].index, "Ot_avg"] = np.nan
 
     logger.info("Converting pitch to the range [-180, 180]")
     scada_df.loc[:, "Ba_avg"] = scada_df["Ba_avg"] % 360
diff --git a/openoa/utils/timeseries.py b/openoa/utils/timeseries.py
@@ -209,7 +209,8 @@ def gap_fill_data_frame(data: pd.DataFrame, dt_col: str, freq: str) -> pd.DataFr
 
     gap_df = pd.DataFrame(columns=data.columns)
     gap_df[dt_col] = find_time_gaps(data[dt_col], freq)
-
+    if gap_df.size == 0:
+        return data.sort_values(dt_col)
     return pd.concat([data, gap_df], axis=0).sort_values(dt_col)
 
 
diff --git a/test/regression/turbine_long_term_gross_energy.py b/test/regression/turbine_long_term_gross_energy.py
@@ -66,12 +66,12 @@ def test_longterm_gross_energy_results(self):
 
         # Test UQ case, mean value
         res_uq = self.analysis_uq.plant_gross.mean()
-        check_uq = 13.5355472
+        check_uq = 13.5355463
         npt.assert_almost_equal(res_uq / 1e6, check_uq)
 
         # Test UQ case, stdev
         res_std_uq = self.analysis_uq.plant_gross.std()
-        check_std_uq = 0.12160433
+        check_std_uq = 0.12161093
         npt.assert_almost_equal(res_std_uq / 1e6, check_std_uq)
 
     def tearDown(self):
diff --git a/test/regression/wake_losses.py b/test/regression/wake_losses.py
@@ -115,8 +115,8 @@ def check_simulation_results_wake_losses_without_UQ(self):
         # Make sure wake loss results are consistent to six decimal places
         # Confirm plant-level and turbine-level wake losses for POR and long-term corrected
         # wake loss estimates.
-        expected_results_por = [0.341373, -11.731022, 10.896697, 4.066565, -1.901445]
-        expected_results_lt = [0.366551, -9.720648, 10.275454, 2.925906, -2.043558]
+        expected_results_por = [0.341363, -11.731031, 10.896701, 4.066524, -1.901442]
+        expected_results_lt = [0.366556, -9.720608, 10.275471, 2.925847, -2.043537]
 
         calculated_results_por = [100 * self.analysis.wake_losses_por]
         calculated_results_por += list(100 * np.array(self.analysis.turbine_wake_losses_por))
@@ -133,28 +133,28 @@ def check_simulation_results_wake_losses_with_UQ(self):
         # Confirm plant-level and turbine-level means and std. devs. from Monte Carlo simulation results
         # for POR and long-term corrected wake loss estimates.
         expected_results_por = [
-            0.472843,
-            1.525822,
-            -11.556679,
-            11.021310,
-            4.174661,
-            -1.781930,
-            1.704648,
-            1.367939,
-            1.487695,
-            1.557837,
+            0.472743,
+            1.521414,
+            -11.563967,
+            11.02269,
+            4.175078,
+            -1.776634,
+            1.698539,
+            1.36572,
+            1.484835,
+            1.551052,
         ]
         expected_results_lt = [
-            0.646298,
-            1.368696,
-            -9.434464,
-            10.603648,
-            3.129204,
-            -1.735165,
-            1.535812,
-            1.321793,
-            1.356194,
-            1.420057,
+            0.646731,
+            1.374425,
+            -9.437244,
+            10.615733,
+            3.114511,
+            -1.728213,
+            1.548299,
+            1.325133,
+            1.364934,
+            1.428777,
         ]
 
         calculated_results_por = [
@@ -180,28 +180,28 @@ def check_simulation_results_wake_losses_with_UQ_new_params(self):
         # Confirm plant-level and turbine-level means and std. devs. from Monte Carlo simulation results
         # for POR and long-term corrected wake loss estimates.
         expected_results_por = [
-            0.916847,
-            2.543303,
-            -10.936793,
-            11.132650,
-            5.244208,
-            -1.772678,
-            2.863249,
-            2.275958,
-            2.405864,
-            2.637304,
+            0.917651,
+            2.541353,
+            -10.941171,
+            11.134159,
+            5.245831,
+            -1.768214,
+            2.867614,
+            2.271275,
+            2.404548,
+            2.631516,
         ]
         expected_results_lt = [
-            1.140988,
-            2.420986,
-            -8.820242,
-            10.996425,
-            3.488785,
-            -1.101014,
-            2.496057,
-            2.331142,
-            2.502902,
-            2.430892,
+            1.140835,
+            2.426398,
+            -8.811414,
+            10.995446,
+            3.487754,
+            -1.108443,
+            2.525045,
+            2.318111,
+            2.507327,
+            2.43125,
         ]
 
         calculated_results_por = [