Merge pull request #205 from NREL/develop

FLASC v2.0.1
NREL · Aug 13, 2024 · 686c62d · 686c62d
2 parents deeee8d + 423157c
commit 686c62d
Show file tree

Hide file tree

Showing 35 changed files with 1,700 additions and 727 deletions.
diff --git a/.github/workflows/continuous-integration-workflow.yaml b/.github/workflows/continuous-integration-workflow.yaml
@@ -13,7 +13,7 @@ jobs:
         os: [ubuntu-latest]
 
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v4
       with:
@@ -33,6 +33,11 @@ jobs:
       run: |
         pip install pytest pytest-cov
         coverage run -m pytest tests/
+
+
+    - name: Ruff
+      uses: chartboost/ruff-action@v1
+
     # - name: Upload coverage to coveralls
     #   if: matrix.os == 'ubuntu-latest'
     #   uses: coverallsapp/github-action@v2

diff --git a/.gitignore b/.gitignore
@@ -8,6 +8,7 @@ __pycache__/
 dist
 build
 _build
+_autosummary
 
 # macOS files
 .DS_Store

diff --git a/docs/_toc.yml b/docs/_toc.yml
@@ -10,9 +10,19 @@ parts:
     - file: installation
     - file: getting_started
 
-  - caption: User Referece
+  - caption: User Reference
     chapters:
     # - file: data_processing
     - file: energy_ratio
     - file: total_uplift
     - file: licensing
+
+  # - caption: Developer Reference
+  #   chapters:
+  #   - file: contributing
+  #   - file: development
+  #   - file: testing
+
+  - caption: API Reference
+    chapters:
+    - file: api
diff --git a/docs/api.rst b/docs/api.rst
@@ -0,0 +1,19 @@
+API Documentation
+=================
+
+FLASC is divided into a number of submodules, each of which is documented in detail below. The main submodules are:
+
+* `flasc.analysis`: Contains functions for energy ratio analysis
+* `flasc.data_processing`: Contains functions for processing raw data
+* `flasc.model_fitting`: Contains functions for fitting FLORIS models to data
+* `flasc.utilities`: Contains utility functions for general use
+
+.. autosummary::
+   :toctree: _autosummary
+   :template: custom-module-template.rst
+   :recursive:
+
+    flasc.analysis
+    flasc.model_fitting
+    flasc.data_processing
+    flasc.utilities
diff --git a/docs/installation.md b/docs/installation.md
@@ -41,11 +41,23 @@ in an interactive environment like Jupyter. -->
 
 # Installation
 
-FLASC is currently not available as a package on any repository manager.
-Instead, it must be installed by the user by cloning the GitHub repository.
+FLASC is available as a package on PyPI. We strongly recommend installing FLASC
+into a conda environment. To do this, use the following commands (replacing
+`flasc-env` with the conda environment name of your choosing).
+```bash
+# Create a conda environment containing python and activate it
+conda create --name flasc-env python
+conda activate flasc-env
 
-To download the source code, use `git clone`. Then, add it to
-your Python path with the "local editable install" through `pip`.
+# Pip install flasc and its dependencies from PyPI
+pip install flasc
+```
+That's it, now you're ready to use FLASC.
+
+To download the source code from the GitHub repository (which will also give
+you access to the examples) and install locally,
+use `git clone`. Then, add it to your Python path with the "local editable install" through `pip`. Again, we recommend doing this in a dedicated conda environment
+(see conda commands above).
 
 ```bash
 # Download the source code.

diff --git a/docs/overview.md b/docs/overview.md
@@ -17,7 +17,7 @@ FLASC consists of multiple modules, including:
 
 This module contains functions that supports importing and processing raw
 SCADA data files. Data is saved in feather
-format for optimal balance of storage size and load/write speed.
+format for optimal balance of storage size and load and write speed.
 
 Functions include filtering data by wind direction, wind speed an/or TI,
 deriving the ambient conditions from the upstream turbines, all the while
@@ -87,15 +87,15 @@ historical SCADA data of three offshore wind farms.
 If FLASC played a role in your research, please cite it. This software can be
 cited as:
 
-   FLASC. Version 2.0 (2024). Available at https://github.com/NREL/flasc.
+   FLASC. Version 2.0.1 (2024). Available at https://github.com/NREL/flasc.
 
 For LaTeX users:
 
 
     @misc{flasc2024,
       author = {NREL},
-      title = {FLASC. Version 2.0},
-      year = {2022},
+      title = {FLASC. Version 2.0.1},
+      year = {2024},
       publisher = {GitHub},
       journal = {GitHub repository},
       url = {https://github.com/NREL/flasc},

diff --git a/flasc/analysis/energy_ratio.py b/flasc/analysis/energy_ratio.py
@@ -1,3 +1,5 @@
+"""Energy ratio analysis module."""
+
 # This is a work in progress as we try to synthesize ideas from the
 # table based methods and energy ratios back into one thing,
 # some ideas we're incorporating:
@@ -41,8 +43,7 @@ def _compute_energy_ratio_single(
     uplift_absolute=False,
     remove_all_nulls=False,
 ):
-    """
-    Compute the energy ratio between two sets of turbines.
+    """Compute the energy ratio between two sets of turbines.
 
     Args:
         df_ (pl.DataFrame): A dataframe containing the data to use in the calculation.
@@ -61,9 +62,9 @@ def _compute_energy_ratio_single(
             the wind speed and wind direction bins.
         weight_by (str): How to weight the energy ratio, options are 'min', or 'sum'.  'min' means
             the minimum count across the dataframes is used to weight the energy ratio.
-            'sum' means the sum of the counts
-            across the dataframes is used to weight the energy ratio.   Defaults to 'min'.
-        df_freq_pl (pl.Dataframe) Polars dataframe of pre-provided per bin weights
+            'sum' means the sum of the counts across the dataframes is
+             used to weight the energy ratio.  Defaults to 'min'.
+        df_freq_pl (pl.Dataframe): Polars dataframe of pre-provided per bin weights
         wd_bin_overlap_radius (float): The distance in degrees one wd bin
             overlaps into the next, must be
             less or equal to half the value of wd_step
@@ -82,10 +83,10 @@ def _compute_energy_ratio_single(
             must be available to compute the bin. Defaults to False.
 
     Returns:
-        pl.DataFrame: A dataframe containing the energy ratio for each wind direction bin
-        pl.DataFrame: A dataframe containing the weights each wind direction and wind speed bin
+        A tuple (pl.DataFrame, pl.DataFrame): A dataframe containing the energy ratio for each wind
+            direction bin and a dataframe containing the weights each wind direction
+            and wind speed bin
     """
-
     # Get the number of dataframes
     num_df = len(df_names)
 
@@ -203,8 +204,7 @@ def _compute_energy_ratio_bootstrap(
     percentiles=[5.0, 95.0],
     remove_all_nulls=False,
 ):
-    """
-    Compute the energy ratio between two sets of turbines with bootstrapping
+    """Compute the energy ratio between two sets of turbines with bootstrapping.
 
     Args:
         er_in (EnergyRatioInput): An EnergyRatioInput object containing
@@ -225,7 +225,7 @@ def _compute_energy_ratio_bootstrap(
             the minimum count across the dataframes is used to weight the energy ratio.
             'sum' means the sum of the counts
             across the dataframes is used to weight the energy ratio.
-        df_freq_pl (pl.Dataframe) Polars dataframe of pre-provided per bin weights
+        df_freq_pl (pl.Dataframe): Polars dataframe of pre-provided per bin weights
         wd_bin_overlap_radius (float): The distance in degrees one wd bin overlaps
             into the next, must be
             less or equal to half the value of wd_step
@@ -251,7 +251,6 @@ def _compute_energy_ratio_bootstrap(
         pl.DataFrame: A dataframe containing the energy ratio between the two sets of turbines.
 
     """
-
     # Otherwise run the function N times and concatenate the results to compute statistics
     er_single_outs = [
         _compute_energy_ratio_single(
@@ -322,8 +321,7 @@ def compute_energy_ratio(
     percentiles=None,
     remove_all_nulls=False,
 ) -> EnergyRatioOutput:
-    """
-    Compute the energy ratio between two sets of turbines with bootstrapping
+    """Compute the energy ratio between two sets of turbines with bootstrapping.
 
     Args:
         er_in (EnergyRatioInput): An EnergyRatioInput object containing
@@ -384,7 +382,6 @@ def compute_energy_ratio(
             ratio between the two sets of turbines.
 
     """
-
     # Get the polars dataframe from within the er_in
     df_ = er_in.get_df()
 

diff --git a/flasc/analysis/energy_ratio_heterogeneity_mapper.py b/flasc/analysis/energy_ratio_heterogeneity_mapper.py
@@ -1,3 +1,5 @@
+"""Module to calculate and visualize the heterogeneity in the inflow wind speed."""
+
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
@@ -40,7 +42,9 @@ def _get_energy_ratio(df, ti, wd_bins, ws_range):
 # extract and plot heterogeneity
 # derived from upstream turbine's power measurements
 class heterogeneity_mapper:
-    """This class is useful to calculate the energy ratios of a set
+    """Class for calculating and visualizing the heterogeneity in the inflow wind speed.
+
+    This class is useful to calculate the energy ratios of a set
     of upstream turbines to then derive the heterogeneity in the
     inflow wind speed. This can be helpful in characterizing the
     ambient wind speed distribution for operational assets where
@@ -51,6 +55,13 @@ class heterogeneity_mapper:
 
     # Private functions
     def __init__(self, df_raw, fm):
+        """Initialize the heterogeneity_mapper class.
+
+        Args:
+            df_raw (pd.DataFrame): The raw SCADA data to use for the analysis.
+            fm (FlorisModel): The FLORIS model
+                to use for the analysis.
+        """
         # Save to self
         self.df_raw = df_raw
         self.fm = fm
@@ -127,13 +138,31 @@ def estimate_heterogeneity(
         wd_bin_width=6.0,
         ws_range=[6.0, 11.0],
     ):
+        """Estimate the heterogeneity in the inflow wind speed.
+
+        Args:
+            df_upstream (_type_): _description_
+            wd_array (_type_, optional): _description_. Defaults to np.arange(0.0, 360.0, 3.0).
+            wd_bin_width (float, optional): _description_. Defaults to 6.0.
+            ws_range (list, optional): _description_. Defaults to [6.0, 11.0].
+
+        Returns:
+            pd.DataFrame: A dataframe containing the energy ratios for all upstream turbines
+                for each wind direction bin.
+        """
         df_list = [
             self._process_single_wd(wd, wd_bin_width, ws_range, df_upstream) for wd in wd_array
         ]
         self.df_heterogeneity = pd.concat(df_list).reset_index(drop=True)
         return self.df_heterogeneity
 
     def plot_graphs(self, ylim=[0.8, 1.2], pdf_save_path=None):
+        """Plot the energy ratios for all upstream turbines for each wind direction bin.
+
+        Args:
+            ylim (list, optional): The y-axis limits for the plots. Defaults to [0.8, 1.2].
+            pdf_save_path (str, optional): The path to save the plots as a PDF. Defaults to None.
+        """
         if self.df_heterogeneity is None:
             raise UserWarning("Please call 'estimate_heterogeneity(...)' first.")
 
@@ -185,6 +214,11 @@ def plot_graphs(self, ylim=[0.8, 1.2], pdf_save_path=None):
             pdf.close()
 
     def generate_floris_hetmap(self):
+        """Generate a dataframe for a FLORIS heterogeneous map.
+
+        Returns:
+            pd.DataFrame: A dataframe containing the FLORIS heterogeneous map values.
+        """
         if self.df_heterogeneity is None:
             raise UserWarning("Please call 'estimate_heterogeneity(...)' first.")
 
@@ -229,6 +263,17 @@ def generate_floris_hetmap(self):
 
     # # Visualization
     def plot_layout(self, ylim=[0.8, 1.2], plot_background_flow=False, pdf_save_path=None):
+        """Plot the layout of the wind farm with the inflow wind speed heterogeneity.
+
+        Args:
+            ylim (list, optional): The y-axis limits for the plots. Defaults to [0.8, 1.2].
+            plot_background_flow (bool, optional): Whether to plot the background flow.
+                Defaults to False.
+            pdf_save_path (str, optional): The path to save the plots as a PDF. Defaults to None.
+
+        Returns:
+            tuple: The figure and axis objects.
+        """
         if self.df_heterogeneity is None:
             raise UserWarning("Please call 'estimate_heterogeneity(...)' first.")
 

diff --git a/flasc/analysis/energy_ratio_input.py b/flasc/analysis/energy_ratio_input.py
@@ -1,3 +1,5 @@
+"""Energy ratio input module."""
+
 from typing import List
 
 import numpy as np
@@ -8,15 +10,19 @@
 
 
 def generate_block_list(N: int, num_blocks: int = 10):
-    """Generate an np.array of length N where each element is an integer between 0 and num_blocks-1
+    """Generate an np.array of length N where each element is an integer between 0 and num_blocks-1.
+
+    Generate an np.array of length N where each element is an integer between 0 and num_blocks-1
     with each value repeating N/num_blocks times.
 
     Args:
         N (int): Length of the array to generate
-        num_blocks (int): Number of blocks to generate
+        num_blocks (int): Number of blocks to generate. Defaults to 10.
 
-    """
+    Returns:
+        np.array: An array of length N with values between 0 and num_blocks-1
 
+    """
     # Test than N and num_blocks are integers greater than 0
     if not isinstance(N, int) or not isinstance(num_blocks, int):
         raise ValueError("N and num_blocks must be integers")
@@ -34,22 +40,25 @@ def generate_block_list(N: int, num_blocks: int = 10):
 
 
 class EnergyRatioInput:
-    """This class holds the structured inputs for calculating energy ratios"""
+    """EnergyRatioInput class.
+
+    This class holds the structured inputs for calculating energy ratios
+    """
 
     def __init__(
         self,
         df_list_in: List[pd.DataFrame],
         df_names: List[str],
         num_blocks: int = 10,
     ) -> None:
-        """Initialize the EnergyRatioInput class
+        """Initialize the EnergyRatioInput class.
 
         Args:
             df_list_in (List[pd.DataFrame]): A list of pandas dataframes to be concatenated
             df_names (List[str]): A list of names for the dataframes
-            num_blocks (int): The number of blocks to use for the energy ratio calculation
+            num_blocks (int): The number of blocks to use for the energy ratio calculation.
+                Defaults to 10.
         """
-
         # Reduce precision if needed and convert to polars
         df_list = [
             pl.from_pandas(df_reduce_precision(df, allow_convert_to_integer=False))
@@ -86,7 +95,7 @@ def __init__(
         self.num_blocks = num_blocks
 
     def get_df(self) -> pl.DataFrame:
-        """Get the concatenated dataframe
+        """Get the concatenated dataframe.
 
         Returns:
             pl.DataFrame: The concatenated dataframe
@@ -97,14 +106,12 @@ def resample_energy_table(self, perform_resample: bool = True) -> pl.DataFrame:
         """Use the block column of an energy table to resample the data.
 
         Args:
-            df_e_ (pl.DataFrame): An energy table with a block column
-            perform_resample: Boolean, if False returns original energy table
+            perform_resample: Boolean, if False returns original energy table. Defaults to True.
 
         Returns:
             pl.DataFrame: A new energy table with (approximately)
                 the same number of rows as the original
         """
-
         if perform_resample:
             # Generate a random np.array, num_blocks long, where each element is
             #  an integer between 0 and num_blocks-1