diff --git a/.github/workflows/continuous-integration-workflow.yaml b/.github/workflows/continuous-integration-workflow.yaml index feabd7ba..c733cab8 100644 --- a/.github/workflows/continuous-integration-workflow.yaml +++ b/.github/workflows/continuous-integration-workflow.yaml @@ -13,7 +13,7 @@ jobs: os: [ubuntu-latest] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: @@ -33,6 +33,11 @@ jobs: run: | pip install pytest pytest-cov coverage run -m pytest tests/ + + + - name: Ruff + uses: chartboost/ruff-action@v1 + # - name: Upload coverage to coveralls # if: matrix.os == 'ubuntu-latest' # uses: coverallsapp/github-action@v2 diff --git a/.gitignore b/.gitignore index 614a9adc..2df3bdf2 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ __pycache__/ dist build _build +_autosummary # macOS files .DS_Store diff --git a/docs/_toc.yml b/docs/_toc.yml index b7d37bb8..f6a2b5c2 100644 --- a/docs/_toc.yml +++ b/docs/_toc.yml @@ -10,9 +10,19 @@ parts: - file: installation - file: getting_started - - caption: User Referece + - caption: User Reference chapters: # - file: data_processing - file: energy_ratio - file: total_uplift - file: licensing + + # - caption: Developer Reference + # chapters: + # - file: contributing + # - file: development + # - file: testing + + - caption: API Reference + chapters: + - file: api diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 00000000..b4ab1253 --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,19 @@ +API Documentation +================= + +FLASC is divided into a number of submodules, each of which is documented in detail below. The main submodules are: + +* `flasc.analysis`: Contains functions for energy ratio analysis +* `flasc.data_processing`: Contains functions for processing raw data +* `flasc.model_fitting`: Contains functions for fitting FLORIS models to data +* `flasc.utilities`: Contains utility functions for general use + +.. autosummary:: + :toctree: _autosummary + :template: custom-module-template.rst + :recursive: + + flasc.analysis + flasc.model_fitting + flasc.data_processing + flasc.utilities diff --git a/docs/installation.md b/docs/installation.md index 81d5dac2..086de17f 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -41,11 +41,23 @@ in an interactive environment like Jupyter. --> # Installation -FLASC is currently not available as a package on any repository manager. -Instead, it must be installed by the user by cloning the GitHub repository. +FLASC is available as a package on PyPI. We strongly recommend installing FLASC +into a conda environment. To do this, use the following commands (replacing +`flasc-env` with the conda environment name of your choosing). +```bash +# Create a conda environment containing python and activate it +conda create --name flasc-env python +conda activate flasc-env -To download the source code, use `git clone`. Then, add it to -your Python path with the "local editable install" through `pip`. +# Pip install flasc and its dependencies from PyPI +pip install flasc +``` +That's it, now you're ready to use FLASC. + +To download the source code from the GitHub repository (which will also give +you access to the examples) and install locally, +use `git clone`. Then, add it to your Python path with the "local editable install" through `pip`. Again, we recommend doing this in a dedicated conda environment +(see conda commands above). ```bash # Download the source code. diff --git a/docs/overview.md b/docs/overview.md index 757c8e68..50fd7c87 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -17,7 +17,7 @@ FLASC consists of multiple modules, including: This module contains functions that supports importing and processing raw SCADA data files. Data is saved in feather -format for optimal balance of storage size and load/write speed. +format for optimal balance of storage size and load and write speed. Functions include filtering data by wind direction, wind speed an/or TI, deriving the ambient conditions from the upstream turbines, all the while @@ -87,15 +87,15 @@ historical SCADA data of three offshore wind farms. If FLASC played a role in your research, please cite it. This software can be cited as: - FLASC. Version 2.0 (2024). Available at https://github.com/NREL/flasc. + FLASC. Version 2.0.1 (2024). Available at https://github.com/NREL/flasc. For LaTeX users: @misc{flasc2024, author = {NREL}, - title = {FLASC. Version 2.0}, - year = {2022}, + title = {FLASC. Version 2.0.1}, + year = {2024}, publisher = {GitHub}, journal = {GitHub repository}, url = {https://github.com/NREL/flasc}, diff --git a/flasc/analysis/energy_ratio.py b/flasc/analysis/energy_ratio.py index 56cc8420..39c5884d 100644 --- a/flasc/analysis/energy_ratio.py +++ b/flasc/analysis/energy_ratio.py @@ -1,3 +1,5 @@ +"""Energy ratio analysis module.""" + # This is a work in progress as we try to synthesize ideas from the # table based methods and energy ratios back into one thing, # some ideas we're incorporating: @@ -41,8 +43,7 @@ def _compute_energy_ratio_single( uplift_absolute=False, remove_all_nulls=False, ): - """ - Compute the energy ratio between two sets of turbines. + """Compute the energy ratio between two sets of turbines. Args: df_ (pl.DataFrame): A dataframe containing the data to use in the calculation. @@ -61,9 +62,9 @@ def _compute_energy_ratio_single( the wind speed and wind direction bins. weight_by (str): How to weight the energy ratio, options are 'min', or 'sum'. 'min' means the minimum count across the dataframes is used to weight the energy ratio. - 'sum' means the sum of the counts - across the dataframes is used to weight the energy ratio. Defaults to 'min'. - df_freq_pl (pl.Dataframe) Polars dataframe of pre-provided per bin weights + 'sum' means the sum of the counts across the dataframes is + used to weight the energy ratio. Defaults to 'min'. + df_freq_pl (pl.Dataframe): Polars dataframe of pre-provided per bin weights wd_bin_overlap_radius (float): The distance in degrees one wd bin overlaps into the next, must be less or equal to half the value of wd_step @@ -82,10 +83,10 @@ def _compute_energy_ratio_single( must be available to compute the bin. Defaults to False. Returns: - pl.DataFrame: A dataframe containing the energy ratio for each wind direction bin - pl.DataFrame: A dataframe containing the weights each wind direction and wind speed bin + A tuple (pl.DataFrame, pl.DataFrame): A dataframe containing the energy ratio for each wind + direction bin and a dataframe containing the weights each wind direction + and wind speed bin """ - # Get the number of dataframes num_df = len(df_names) @@ -203,8 +204,7 @@ def _compute_energy_ratio_bootstrap( percentiles=[5.0, 95.0], remove_all_nulls=False, ): - """ - Compute the energy ratio between two sets of turbines with bootstrapping + """Compute the energy ratio between two sets of turbines with bootstrapping. Args: er_in (EnergyRatioInput): An EnergyRatioInput object containing @@ -225,7 +225,7 @@ def _compute_energy_ratio_bootstrap( the minimum count across the dataframes is used to weight the energy ratio. 'sum' means the sum of the counts across the dataframes is used to weight the energy ratio. - df_freq_pl (pl.Dataframe) Polars dataframe of pre-provided per bin weights + df_freq_pl (pl.Dataframe): Polars dataframe of pre-provided per bin weights wd_bin_overlap_radius (float): The distance in degrees one wd bin overlaps into the next, must be less or equal to half the value of wd_step @@ -251,7 +251,6 @@ def _compute_energy_ratio_bootstrap( pl.DataFrame: A dataframe containing the energy ratio between the two sets of turbines. """ - # Otherwise run the function N times and concatenate the results to compute statistics er_single_outs = [ _compute_energy_ratio_single( @@ -322,8 +321,7 @@ def compute_energy_ratio( percentiles=None, remove_all_nulls=False, ) -> EnergyRatioOutput: - """ - Compute the energy ratio between two sets of turbines with bootstrapping + """Compute the energy ratio between two sets of turbines with bootstrapping. Args: er_in (EnergyRatioInput): An EnergyRatioInput object containing @@ -384,7 +382,6 @@ def compute_energy_ratio( ratio between the two sets of turbines. """ - # Get the polars dataframe from within the er_in df_ = er_in.get_df() diff --git a/flasc/analysis/energy_ratio_heterogeneity_mapper.py b/flasc/analysis/energy_ratio_heterogeneity_mapper.py index 68d32b86..0686c0e1 100644 --- a/flasc/analysis/energy_ratio_heterogeneity_mapper.py +++ b/flasc/analysis/energy_ratio_heterogeneity_mapper.py @@ -1,3 +1,5 @@ +"""Module to calculate and visualize the heterogeneity in the inflow wind speed.""" + import matplotlib.pyplot as plt import numpy as np import pandas as pd @@ -40,7 +42,9 @@ def _get_energy_ratio(df, ti, wd_bins, ws_range): # extract and plot heterogeneity # derived from upstream turbine's power measurements class heterogeneity_mapper: - """This class is useful to calculate the energy ratios of a set + """Class for calculating and visualizing the heterogeneity in the inflow wind speed. + + This class is useful to calculate the energy ratios of a set of upstream turbines to then derive the heterogeneity in the inflow wind speed. This can be helpful in characterizing the ambient wind speed distribution for operational assets where @@ -51,6 +55,13 @@ class heterogeneity_mapper: # Private functions def __init__(self, df_raw, fm): + """Initialize the heterogeneity_mapper class. + + Args: + df_raw (pd.DataFrame): The raw SCADA data to use for the analysis. + fm (FlorisModel): The FLORIS model + to use for the analysis. + """ # Save to self self.df_raw = df_raw self.fm = fm @@ -127,6 +138,18 @@ def estimate_heterogeneity( wd_bin_width=6.0, ws_range=[6.0, 11.0], ): + """Estimate the heterogeneity in the inflow wind speed. + + Args: + df_upstream (_type_): _description_ + wd_array (_type_, optional): _description_. Defaults to np.arange(0.0, 360.0, 3.0). + wd_bin_width (float, optional): _description_. Defaults to 6.0. + ws_range (list, optional): _description_. Defaults to [6.0, 11.0]. + + Returns: + pd.DataFrame: A dataframe containing the energy ratios for all upstream turbines + for each wind direction bin. + """ df_list = [ self._process_single_wd(wd, wd_bin_width, ws_range, df_upstream) for wd in wd_array ] @@ -134,6 +157,12 @@ def estimate_heterogeneity( return self.df_heterogeneity def plot_graphs(self, ylim=[0.8, 1.2], pdf_save_path=None): + """Plot the energy ratios for all upstream turbines for each wind direction bin. + + Args: + ylim (list, optional): The y-axis limits for the plots. Defaults to [0.8, 1.2]. + pdf_save_path (str, optional): The path to save the plots as a PDF. Defaults to None. + """ if self.df_heterogeneity is None: raise UserWarning("Please call 'estimate_heterogeneity(...)' first.") @@ -185,6 +214,11 @@ def plot_graphs(self, ylim=[0.8, 1.2], pdf_save_path=None): pdf.close() def generate_floris_hetmap(self): + """Generate a dataframe for a FLORIS heterogeneous map. + + Returns: + pd.DataFrame: A dataframe containing the FLORIS heterogeneous map values. + """ if self.df_heterogeneity is None: raise UserWarning("Please call 'estimate_heterogeneity(...)' first.") @@ -229,6 +263,17 @@ def generate_floris_hetmap(self): # # Visualization def plot_layout(self, ylim=[0.8, 1.2], plot_background_flow=False, pdf_save_path=None): + """Plot the layout of the wind farm with the inflow wind speed heterogeneity. + + Args: + ylim (list, optional): The y-axis limits for the plots. Defaults to [0.8, 1.2]. + plot_background_flow (bool, optional): Whether to plot the background flow. + Defaults to False. + pdf_save_path (str, optional): The path to save the plots as a PDF. Defaults to None. + + Returns: + tuple: The figure and axis objects. + """ if self.df_heterogeneity is None: raise UserWarning("Please call 'estimate_heterogeneity(...)' first.") diff --git a/flasc/analysis/energy_ratio_input.py b/flasc/analysis/energy_ratio_input.py index 0a5ca698..6918bb19 100644 --- a/flasc/analysis/energy_ratio_input.py +++ b/flasc/analysis/energy_ratio_input.py @@ -1,3 +1,5 @@ +"""Energy ratio input module.""" + from typing import List import numpy as np @@ -8,15 +10,19 @@ def generate_block_list(N: int, num_blocks: int = 10): - """Generate an np.array of length N where each element is an integer between 0 and num_blocks-1 + """Generate an np.array of length N where each element is an integer between 0 and num_blocks-1. + + Generate an np.array of length N where each element is an integer between 0 and num_blocks-1 with each value repeating N/num_blocks times. Args: N (int): Length of the array to generate - num_blocks (int): Number of blocks to generate + num_blocks (int): Number of blocks to generate. Defaults to 10. - """ + Returns: + np.array: An array of length N with values between 0 and num_blocks-1 + """ # Test than N and num_blocks are integers greater than 0 if not isinstance(N, int) or not isinstance(num_blocks, int): raise ValueError("N and num_blocks must be integers") @@ -34,7 +40,10 @@ def generate_block_list(N: int, num_blocks: int = 10): class EnergyRatioInput: - """This class holds the structured inputs for calculating energy ratios""" + """EnergyRatioInput class. + + This class holds the structured inputs for calculating energy ratios + """ def __init__( self, @@ -42,14 +51,14 @@ def __init__( df_names: List[str], num_blocks: int = 10, ) -> None: - """Initialize the EnergyRatioInput class + """Initialize the EnergyRatioInput class. Args: df_list_in (List[pd.DataFrame]): A list of pandas dataframes to be concatenated df_names (List[str]): A list of names for the dataframes - num_blocks (int): The number of blocks to use for the energy ratio calculation + num_blocks (int): The number of blocks to use for the energy ratio calculation. + Defaults to 10. """ - # Reduce precision if needed and convert to polars df_list = [ pl.from_pandas(df_reduce_precision(df, allow_convert_to_integer=False)) @@ -86,7 +95,7 @@ def __init__( self.num_blocks = num_blocks def get_df(self) -> pl.DataFrame: - """Get the concatenated dataframe + """Get the concatenated dataframe. Returns: pl.DataFrame: The concatenated dataframe @@ -97,14 +106,12 @@ def resample_energy_table(self, perform_resample: bool = True) -> pl.DataFrame: """Use the block column of an energy table to resample the data. Args: - df_e_ (pl.DataFrame): An energy table with a block column - perform_resample: Boolean, if False returns original energy table + perform_resample: Boolean, if False returns original energy table. Defaults to True. Returns: pl.DataFrame: A new energy table with (approximately) the same number of rows as the original """ - if perform_resample: # Generate a random np.array, num_blocks long, where each element is # an integer between 0 and num_blocks-1 diff --git a/flasc/analysis/energy_ratio_output.py b/flasc/analysis/energy_ratio_output.py index 5a25df3b..0814cca0 100644 --- a/flasc/analysis/energy_ratio_output.py +++ b/flasc/analysis/energy_ratio_output.py @@ -1,3 +1,5 @@ +"""Store the results of the energy ratio calculations.""" + from __future__ import annotations from typing import Any, Dict, List, Optional, Union @@ -18,8 +20,9 @@ class EnergyRatioOutput: - """This class is used to store the results of the energy ratio calculations - and provide convenient methods for plotting and saving the results. + """Store the results of the energy ratio calculations. + + Additionally provide convenient methods for plotting and saving the results. """ def __init__( @@ -393,7 +396,7 @@ def plot_uplift( show_wind_speed_distribution: bool = True, overlay_frequency: bool = False, ) -> Union[axes.Axes, List[axes.Axes]]: - """Plot the uplift in energy ratio + """Plot the uplift in energy ratio. Args: uplift_names_subset (Optional[List[str]], optional): A subset @@ -534,9 +537,7 @@ def plot_uplift( return axarr def _compute_ws_counts(self): - """Compute the of ws bin counts as previously computed but not presently - computed with the energy calculation.""" - + """Compute the ws bin counts.""" # Temporary copy of energy table df_ = self.er_in.get_df() diff --git a/flasc/analysis/total_uplift.py b/flasc/analysis/total_uplift.py index 3dd11d2e..390ebd7c 100644 --- a/flasc/analysis/total_uplift.py +++ b/flasc/analysis/total_uplift.py @@ -1,3 +1,5 @@ +"""Module for computing the total uplift in energy production.""" + # This is a work in progress as we try to synthesize ideas from the # table based methods and energy ratios back into one thing, # some ideas we're incorporating: @@ -42,8 +44,7 @@ def _compute_total_uplift_single( uplift_names=[], remove_all_nulls=False, ): - """ - Compute the total change in energy production between two sets of turbines. + """Compute the total change in energy production between two sets of turbines. Args: df_ (pl.DataFrame): A dataframe containing the data to use in the calculation. @@ -64,7 +65,7 @@ def _compute_total_uplift_single( the minimum count across the dataframes is used to weight the energy ratio. 'sum' means the sum of the counts across the dataframes is used to weight the energy ratio. Defaults to 'min'. - df_freq_pl (pl.Dataframe) Polars dataframe of pre-provided per bin weights + df_freq_pl (pl.Dataframe): Polars dataframe of pre-provided per bin weights wd_bin_overlap_radius (float): The distance in degrees one wd bin overlaps into the next, must be less or equal to half the value of wd_step @@ -80,10 +81,11 @@ def _compute_total_uplift_single( must be available to compute the bin. Defaults to False. Returns: - dict: A dictionary with results indexed for each element of uplift_names - pl.DataFrame: A dataframe containing the weights each wind direction and wind speed bin + A tuple (dict, pl.DataFrame): containing the results of the computation + and the frequency table. The dictionary contains the uplift results indexed by the + uplift_names. The dataframe contains the weights for each wind direction + and wind speed bin. """ - # Get the number of dataframes num_df = len(df_names) @@ -202,8 +204,7 @@ def _compute_total_uplift_bootstrap( percentiles=[5.0, 95.0], remove_all_nulls=False, ): - """ - Compute the total change in energy production between two sets of turbines with bootstrapping + """Compute the total change in energy between two sets of turbines with bootstrapping. Args: er_in (EnergyRatioInput): An EnergyRatioInput object @@ -224,7 +225,7 @@ def _compute_total_uplift_bootstrap( the minimum count across the dataframes is used to weight the energy ratio. 'sum' means the sum of the counts across the dataframes is used to weight the energy ratio. - df_freq_pl (pl.Dataframe) Polars dataframe of pre-provided per bin weights + df_freq_pl (pl.Dataframe): Polars dataframe of pre-provided per bin weights wd_bin_overlap_radius (float): The distance in degrees one wd bin overlaps into the next, must be less or equal to half the value of wd_step @@ -247,7 +248,6 @@ def _compute_total_uplift_bootstrap( pl.DataFrame: A dataframe containing the energy ratio between the two sets of turbines. """ - # Otherwise run the function N times and concatenate the results to compute statistics uplift_single_outs = [ _compute_total_uplift_single( @@ -333,8 +333,7 @@ def compute_total_uplift( percentiles=None, remove_all_nulls=False, ) -> dict: # dict output for now, may change later - """ - Compute the energy ratio between two sets of turbines with bootstrapping + """Compute the energy ratio between two sets of turbines with bootstrapping. Args: er_in (EnergyRatioInput): An EnergyRatioInput object @@ -392,7 +391,6 @@ def compute_total_uplift( two sets of turbines. """ - # Get the polars dataframe from within the er_in df_ = er_in.get_df() diff --git a/flasc/data_processing/dataframe_manipulations.py b/flasc/data_processing/dataframe_manipulations.py index 25a5f809..b4d43be2 100644 --- a/flasc/data_processing/dataframe_manipulations.py +++ b/flasc/data_processing/dataframe_manipulations.py @@ -1,4 +1,5 @@ -# import datetime +"""Module containing methods for FLASC dataframe manipulations.""" + import datetime import os as os import warnings @@ -18,12 +19,30 @@ # Functions related to wind farm analysis for df def filter_df_by_ws(df, ws_range): + """Filter a dataframe by wind speed range. + + Args: + df (pd.DataFrame): Dataframe with measurements. + ws_range ([float, float]): Wind speed range [lower bound, upper bound]. + + Returns: + pd.DataFrame: Filtered dataframe. + """ df = df[df["ws"] >= ws_range[0]] df = df[df["ws"] < ws_range[1]] return df def filter_df_by_wd(df, wd_range): + """Filter a dataframe by wind direction range. + + Args: + df (pd.DataFrame): Dataframe with measurements. + wd_range ([float, float]): Wind direction range [lower bound, upper bound]. + + Returns: + pd.DataFrame: Filtered dataframe. + """ lb = wd_range[0] ub = wd_range[1] @@ -40,17 +59,46 @@ def filter_df_by_wd(df, wd_range): def filter_df_by_ti(df, ti_range): + """Filter a dataframe by turbulence intensity range. + + Args: + df (pd.DataFrame): Dataframe with measurements. + ti_range ([float, float]): Turbulence intensity range [lower bound, upper bound]. + + Returns: + pd.DataFrame: Filtered dataframe. + """ df = df[df["ti"] >= ti_range[0]] df = df[df["ti"] < ti_range[1]] return df def get_num_turbines(df): + """Get the number of turbines in a dataframe. + + Args: + df (pd.DataFrame): Dataframe with turbine data + + Returns: + int: Number of turbines in the dataframe + """ return fsut.get_num_turbines(df) # Generic functions for column operations def get_column_mean(df, col_prefix="pow", turbine_list=None, circular_mean=False): + """Get the mean of a column for a list of turbines. + + Args: + df (pd.Dataframe): Dataframe with measurements. + col_prefix (str, optional): Column prefix to use. Defaults to "pow". + turbine_list ([list, array], optional): List of turbine numbers to use. + If None, all turbines are used. Defaults to None. + circular_mean (bool, optional): Use circular mean. Defaults to False. + + Returns: + np.array: Mean of the column for the specified turbines. + """ if turbine_list is None: turbine_list = range(get_num_turbines(df)) # Assume all turbines elif isinstance(turbine_list, (int, np.integer)): @@ -204,35 +252,40 @@ def _set_col_by_upstream_turbines_in_radius( circular_mean, include_itself=True, ): - """Add a column called [col_out] to your dataframe, which is the + """Add a column of averaged upstream turbine values. + + Add a column called [col_out] to your dataframe, which is the mean of the columns pow_%03d for turbines that are upstream and also within radius [max_radius] of the turbine of interest [turb_no]. Args: - df ([pd.DataFrame]): Dataframe with measurements. This dataframe - typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and - potentially additional measurements. - df_upstream ([pd.DataFrame]): Dataframe containing rows indicating - wind direction ranges and the corresponding upstream turbines for - that wind direction range. This variable can be generated with - flasc.utilities.floris_tools.get_upstream_turbs_floris(...). - turb_no ([int]): Turbine number from which the radius should be - calculated. + col_out (str): Column name to be added to the dataframe. + col_prefix (str): Column prefix to use. + df (pd.DataFrame): Dataframe with measurements. This dataframe + typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and + potentially additional measurements. + df_upstream (pd.DataFrame): Dataframe containing rows indicating + wind direction ranges and the corresponding upstream turbines for + that wind direction range. This variable can be generated with + flasc.utilities.floris_tools.get_upstream_turbs_floris(...). + turb_no (int): Turbine number from which the radius should be + calculated. + turb_no (int): Turbine number from which the radius should be x_turbs ([list, array]): Array containing x locations of turbines. y_turbs ([list, array]): Array containing y locations of turbines. - max_radius ([float]): Maximum radius for the upstream turbines - until which they are still considered as relevant/used for the - calculation of the averaged column quantity. + max_radius (float): Maximum radius for the upstream turbines + until which they are still considered as relevant/used for the + calculation of the averaged column quantity. + circular_mean (bool): Use circular mean. Defaults to False. include_itself (bool, optional): Include the measurements of turbine - turb_no in the determination of the averaged column quantity. Defaults - to False. + turb_no in the determination of the averaged column quantity. Defaults + to False. Returns: - df ([pd.DataFrame]): Dataframe which equals the inserted dataframe + df (pd.Dataframe): Dataframe which equals the inserted dataframe plus the additional column called [col_ref]. """ - turbs_in_radius = ftools.get_turbs_in_radius( x_turbs=x_turbs, y_turbs=y_turbs, @@ -259,42 +312,70 @@ def _set_col_by_upstream_turbines_in_radius( # Helper functions def set_wd_by_turbines(df, turbine_numbers): - """Add a column called 'wd' in your dataframe with value equal + """Add WD column by list of turbines. + + Add a column called 'wd' in your dataframe with value equal to the circular-averaged wind direction measurements of all the turbines in turbine_numbers. Args: - df ([pd.DataFrame]): Dataframe with measurements. This dataframe - typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and - potentially additional measurements. + df (pd.DataFrame): Dataframe with measurements. This dataframe + typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and + potentially additional measurements. turbine_numbers ([list, array]): List of turbine numbers that - should be used to calculate the column average. + should be used to calculate the column average. Returns: - df ([pd.DataFrame]): Dataframe which equals the inserted dataframe - plus the additional column called 'wd'. + df (pd.DataFrame): Dataframe which equals the inserted dataframe + plus the additional column called 'wd'. """ return _set_col_by_turbines("wd", "wd", df, turbine_numbers, True) def set_wd_by_all_turbines(df): - """Add a column called 'wd' in your dataframe with value equal + """Add a wind direction column using all turbines. + + Add a column called 'wd' in your dataframe with value equal to the circular-averaged wind direction measurements of all turbines. Args: - df ([pd.DataFrame]): Dataframe with measurements. This dataframe - typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and - potentially additional measurements. + df (pd.DataFrame): Dataframe with measurements. This dataframe + typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and + potentially additional measurements. Returns: - df ([pd.DataFrame]): Dataframe which equals the inserted dataframe - plus the additional column called 'wd'. + pd.Dataframe: Dataframe which equals the inserted dataframe + plus the additional column called 'wd'. """ return _set_col_by_turbines("wd", "wd", df, "all", True) def set_wd_by_radius_from_turbine(df, turb_no, x_turbs, y_turbs, max_radius, include_itself=True): + """Add wind direction column by turbines in radius. + + Add a column called 'wd' to your dataframe, which is the + mean of the columns wd_%03d for turbines that are within radius + [max_radius] of the turbine of interest [turb_no]. + + Args: + df (pd.DataFrame): Dataframe with measurements. This dataframe + typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and + potentially additional measurements. + turb_no (int): Turbine number from which the radius should be calculated. + x_turbs ([list, array]): Array containing x locations of turbines. + y_turbs ([list, array]): Array containing y locations of turbines. + max_radius (float): Maximum radius for the upstream turbines + until which they are still considered as relevant/used for the + calculation of the averaged column quantity. + include_itself (bool, optional): Include the measurements of turbine + turb_no in the determination of the averaged column quantity. Defaults + to False. + + Returns: + pd.DataFrame: Dataframe which equals the inserted dataframe + plus the additional column called 'wd'. + """ return _set_col_by_radius_from_turbine( col_out="wd", col_prefix="wd", @@ -309,62 +390,72 @@ def set_wd_by_radius_from_turbine(df, turb_no, x_turbs, y_turbs, max_radius, inc def set_ws_by_turbines(df, turbine_numbers): - """Add a column called 'ws' in your dataframe with value equal - to the circular-averaged wind direction measurements of all - the turbines in turbine_numbers. + """Add ws column by list of turbines. + + Add a column called 'ws' in your dataframe with value equal + to the mean wind speed measurements of all the turbines in + turbine_numbers. Args: - df ([pd.DataFrame]): Dataframe with measurements. This dataframe + df (pd.DataFrame): Dataframe with measurements. This dataframe typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and potentially additional measurements. turbine_numbers ([list, array]): List of turbine numbers that should be used to calculate the column average. Returns: - df ([pd.DataFrame]): Dataframe which equals the inserted dataframe + df (pd.DataFrame): Dataframe which equals the inserted dataframe plus the additional column called 'ws'. """ return _set_col_by_turbines("ws", "ws", df, turbine_numbers, False) def set_ws_by_all_turbines(df): - """Add a column called 'ws' in your dataframe with value equal + """Add ws column by all turbines. + + Add a column called 'ws' in your dataframe with value equal to the circular-averaged wind direction measurements of all turbines. Args: - df ([pd.DataFrame]): Dataframe with measurements. This dataframe - typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and - potentially additional measurements. + df (pd.DataFrame): Dataframe with measurements. This dataframe + typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and + potentially additional measurements. turbine_numbers ([list, array]): List of turbine numbers that - should be used to calculate the column average. + should be used to calculate the column average. Returns: - df ([pd.DataFrame]): Dataframe which equals the inserted dataframe - plus the additional column called 'ws'. + pd.Dataframe: Dataframe which equals the inserted dataframe + plus the additional column called 'ws'. """ return _set_col_by_turbines("ws", "ws", df, "all", False) def set_ws_by_upstream_turbines(df, df_upstream, exclude_turbs=[]): - """Add a column called 'ws' in your dataframe with value equal + """Add wind speed column using upstream turbines. + + Add a column called 'ws' in your dataframe with value equal to the averaged wind speed measurements of all the turbines upstream, excluding the turbines listed in exclude_turbs. Args: - df ([pd.DataFrame]): Dataframe with measurements. This dataframe - typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and - potentially additional measurements. - df_upstream ([pd.DataFrame]): Dataframe containing rows indicating - wind direction ranges and the corresponding upstream turbines for - that wind direction range. This variable can be generated with - flasc.utilities.floris_tools.get_upstream_turbs_floris(...). + df (pd.DataFrame): Dataframe with measurements. This dataframe + typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and + potentially additional measurements. + df_upstream (pd.DataFrame): Dataframe containing rows indicating + wind direction ranges and the corresponding upstream turbines for + that wind direction range. This variable can be generated with + flasc.utilities.floris_tools.get_upstream_turbs_floris(...). + exclude_turbs ([list, array]): array-like variable containing + turbine indices that should be excluded in determining the column + mean quantity. exclude_turbs ([list, array]): array-like variable containing - turbine indices that should be excluded in determining the column - mean quantity. + turbine indices that should be excluded in determining the column + mean quantity. + Returns: - df ([pd.DataFrame]): Dataframe which equals the inserted dataframe - plus the additional column called 'ws'. + pd.Dataframe: Dataframe which equals the inserted dataframe + plus the additional column called 'ws'. """ return _set_col_by_upstream_turbines( col_out="ws", @@ -379,32 +470,35 @@ def set_ws_by_upstream_turbines(df, df_upstream, exclude_turbs=[]): def set_ws_by_upstream_turbines_in_radius( df, df_upstream, turb_no, x_turbs, y_turbs, max_radius, include_itself=True ): - """Add a column called 'ws' to your dataframe, which is the + """Add wind speed column using in-radius upstream turbines. + + Add a column called 'ws' to your dataframe, which is the mean of the columns pow_%03d for turbines that are upstream and also within radius [max_radius] of the turbine of interest [turb_no]. Args: - df ([pd.DataFrame]): Dataframe with measurements. This dataframe - typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and - potentially additional measurements. - df_upstream ([pd.DataFrame]): Dataframe containing rows indicating - wind direction ranges and the corresponding upstream turbines for - that wind direction range. This variable can be generated with - flasc.utilities.floris_tools.get_upstream_turbs_floris(...). - turb_no ([int]): Turbine number from which the radius should be - calculated. + df (pd.DataFrame): Dataframe with measurements. This dataframe + typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and + potentially additional measurements. + df_upstream (pd.DataFrame): Dataframe containing rows indicating + wind direction ranges and the corresponding upstream turbines for + that wind direction range. This variable can be generated with + flasc.utilities.floris_tools.get_upstream_turbs_floris(...). + turb_no (int): Turbine number from which the radius should be + calculated. + turb_no (int): Turbine number from which the radius should be x_turbs ([list, array]): Array containing x locations of turbines. y_turbs ([list, array]): Array containing y locations of turbines. - max_radius ([float]): Maximum radius for the upstream turbines - until which they are still considered as relevant/used for the - calculation of the averaged column quantity. + max_radius (float): Maximum radius for the upstream turbines + until which they are still considered as relevant/used for the + calculation of the averaged column quantity. include_itself (bool, optional): Include the measurements of turbine - turb_no in the determination of the averaged column quantity. Defaults - to False. + turb_no in the determination of the averaged column quantity. Defaults + to False. Returns: - df ([pd.DataFrame]): Dataframe which equals the inserted dataframe + pd.Dataframe: Dataframe which equals the inserted dataframe plus the additional column called 'ws'. """ return _set_col_by_upstream_turbines_in_radius( @@ -424,31 +518,31 @@ def set_ws_by_upstream_turbines_in_radius( def set_ws_by_n_closest_upstream_turbines( df, df_upstream, turb_no, x_turbs, y_turbs, exclude_turbs=[], N=5 ): - """Add a column called 'pow_ref' to your dataframe, which is the - mean of the columns pow_%03d for the 5 closest turbines that are + """Add wind speed column by N closest upstream turbines. + + Add a column called 'ws' to your dataframe, which is the + mean of the columns ws_%03d for the N closest turbines that are upstream of the turbine of interest [turb_no]. Args: - df ([pd.DataFrame]): Dataframe with measurements. This dataframe - typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and - potentially additional measurements. - df_upstream ([pd.DataFrame]): Dataframe containing rows indicating - wind direction ranges and the corresponding upstream turbines for - that wind direction range. This variable can be generated with - flasc.utilities.floris_tools.get_upstream_turbs_floris(...). - turb_no ([int]): Turbine number from which the radius should be - calculated. + df (pd.DataFrame): Dataframe with measurements. This dataframe + typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and + potentially additional measurements. + df_upstream (pd.DataFrame): Dataframe containing rows indicating + wind direction ranges and the corresponding upstream turbines for + that wind direction range. This variable can be generated with + flasc.utilities.floris_tools.get_upstream_turbs_floris(...). + turb_no (int): Turbine number from which the radius should be + calculated. + turb_no (int): Turbine number from which the radius should be x_turbs ([list, array]): Array containing x locations of turbines. y_turbs ([list, array]): Array containing y locations of turbines. - max_radius ([float]): Maximum radius for the upstream turbines - until which they are still considered as relevant/used for the - calculation of the averaged column quantity. - include_itself (bool, optional): Include the measurements of turbine - turb_no in the determination of the averaged column quantity. Defaults - to False. - - Returns: - df ([pd.DataFrame]): Dataframe which equals the inserted dataframe + exclude_turbs ([list, array]): array-like variable containing + turbine indices that should be excluded in determining the column + mean quantity. + N (int): Number of closest turbines to consider for the calculation + Returns: + pd.Dataframe: Dataframe which equals the inserted dataframe plus the additional column called 'pow_ref'. """ return _set_col_by_n_closest_upstream_turbines( @@ -466,62 +560,72 @@ def set_ws_by_n_closest_upstream_turbines( def set_ti_by_turbines(df, turbine_numbers): - """Add a column called 'ti' in your dataframe with value equal + """Add TI column by list of turbines. + + Add a column called 'ti' in your dataframe with value equal to the averaged turbulence intensity measurements of all the turbines listed in turbine_numbers. Args: - df ([pd.DataFrame]): Dataframe with measurements. This dataframe - typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and - potentially additional measurements. + df (pd.DataFrame): Dataframe with measurements. This dataframe + typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and + potentially additional measurements. turbine_numbers ([list, array]): List of turbine numbers that - should be used to calculate the column average. + should be used to calculate the column average. Returns: - df ([pd.DataFrame]): Dataframe which equals the inserted dataframe - plus the additional column called 'ti'. + pd.Dataframe: Dataframe which equals the inserted dataframe + plus the additional column called 'ti'. """ return _set_col_by_turbines("ti", "ti", df, turbine_numbers, False) def set_ti_by_all_turbines(df): - """Add a column called 'ti' in your dataframe with value equal + """Add TI column using all turbines. + + Add a column called 'ti' in your dataframe with value equal to the averaged turbulence intensity measurements of all turbines. Args: - df ([pd.DataFrame]): Dataframe with measurements. This dataframe - typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and - potentially additional measurements. + df (pd.Dataframe): Dataframe with measurements. This dataframe + typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and + potentially additional measurements. turbine_numbers ([list, array]): List of turbine numbers that - should be used to calculate the column average. + should be used to calculate the column average. Returns: - df ([pd.DataFrame]): Dataframe which equals the inserted dataframe + df (pd.Dataframe): Dataframe which equals the inserted dataframe plus the additional column called 'ti'. """ return _set_col_by_turbines("ti", "ti", df, "all", False) def set_ti_by_upstream_turbines(df, df_upstream, exclude_turbs=[]): - """Add a column called 'ti' in your dataframe with value equal + """Add TI column using upstream turbines. + + Add a column called 'ti' in your dataframe with value equal to the averaged turbulence intensity measurements of all the turbines upstream, excluding the turbines listed in exclude_turbs. Args: - df ([pd.DataFrame]): Dataframe with measurements. This dataframe - typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and - potentially additional measurements. - df_upstream ([pd.DataFrame]): Dataframe containing rows indicating - wind direction ranges and the corresponding upstream turbines for - that wind direction range. This variable can be generated with - flasc.utilities.floris_tools.get_upstream_turbs_floris(...). + df (pd.Dataframe): Dataframe with measurements. This dataframe + typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and + potentially additional measurements. + df_upstream (pd.Dataframe): Dataframe containing rows indicating + wind direction ranges and the corresponding upstream turbines for + that wind direction range. This variable can be generated with + flasc.utilities.floris_tools.get_upstream_turbs_floris(...). + exclude_turbs ([list, array]): array-like variable containing + turbine indices that should be excluded in determining the column + mean quantity. exclude_turbs ([list, array]): array-like variable containing - turbine indices that should be excluded in determining the column - mean quantity. + turbine indices that should be excluded in determining the column + mean quantity. + Returns: - df ([pd.DataFrame]): Dataframe which equals the inserted dataframe - plus the additional column called 'ti'. + pd.Dataframe: Dataframe which equals the inserted dataframe + plus the additional column called 'ti'. """ return _set_col_by_upstream_turbines( col_out="ti", @@ -536,32 +640,35 @@ def set_ti_by_upstream_turbines(df, df_upstream, exclude_turbs=[]): def set_ti_by_upstream_turbines_in_radius( df, df_upstream, turb_no, x_turbs, y_turbs, max_radius, include_itself=True ): - """Add a column called 'ti' to your dataframe, which is the + """Add TI column by upstream turbines within a radius. + + Add a column called 'ti' to your dataframe, which is the mean of the columns ti_%03d for turbines that are upstream and also within radius [max_radius] of the turbine of interest [turb_no]. Args: - df ([pd.DataFrame]): Dataframe with measurements. This dataframe - typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and - potentially additional measurements. - df_upstream ([pd.DataFrame]): Dataframe containing rows indicating - wind direction ranges and the corresponding upstream turbines for - that wind direction range. This variable can be generated with - flasc.utilities.floris_tools.get_upstream_turbs_floris(...). - turb_no ([int]): Turbine number from which the radius should be - calculated. + df (pd.Dataframe): Dataframe with measurements. This dataframe + typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and + potentially additional measurements. + df_upstream (pd.Dataframe): Dataframe containing rows indicating + wind direction ranges and the corresponding upstream turbines for + that wind direction range. This variable can be generated with + flasc.utilities.floris_tools.get_upstream_turbs_floris(...). + turb_no (int): Turbine number from which the radius should be + calculated. + turb_no (int): Turbine number from which the radius should be x_turbs ([list, array]): Array containing x locations of turbines. y_turbs ([list, array]): Array containing y locations of turbines. - max_radius ([float]): Maximum radius for the upstream turbines - until which they are still considered as relevant/used for the - calculation of the averaged column quantity. + max_radius (float): Maximum radius for the upstream turbines + until which they are still considered as relevant/used for the + calculation of the averaged column quantity. include_itself (bool, optional): Include the measurements of turbine - turb_no in the determination of the averaged column quantity. Defaults - to False. + turb_no in the determination of the averaged column quantity. Defaults + to False. Returns: - df ([pd.DataFrame]): Dataframe which equals the inserted dataframe + pd.Dataframe: Dataframe which equals the inserted dataframe plus the additional column called 'ti'. """ return _set_col_by_upstream_turbines_in_radius( @@ -579,43 +686,49 @@ def set_ti_by_upstream_turbines_in_radius( def set_pow_ref_by_turbines(df, turbine_numbers): - """Add a column called 'pow_ref' in your dataframe with value equal + """Add power reference column by list of turbines. + + Add a column called 'pow_ref' in your dataframe with value equal to the averaged turbulence intensity measurements of all the turbines listed in turbine_numbers. Args: - df ([pd.DataFrame]): Dataframe with measurements. This dataframe - typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and - potentially additional measurements. + df (pd.Dataframe): Dataframe with measurements. This dataframe + typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and + potentially additional measurements. turbine_numbers ([list, array]): List of turbine numbers that - should be used to calculate the column average. + should be used to calculate the column average. Returns: - df ([pd.DataFrame]): Dataframe which equals the inserted dataframe - plus the additional column called 'ti'. + pd.Dataframe: Dataframe which equals the inserted dataframe + plus the additional column called 'ti'. """ return _set_col_by_turbines("pow_ref", "pow", df, turbine_numbers, False) def set_pow_ref_by_upstream_turbines(df, df_upstream, exclude_turbs=[]): - """Add a column called 'pow_ref' in your dataframe with value equal + """Add pow_ref column using upstream turbines. + + Add a column called 'pow_ref' in your dataframe with value equal to the averaged power measurements of all the turbines upstream, excluding the turbines listed in exclude_turbs. Args: - df ([pd.DataFrame]): Dataframe with measurements. This dataframe - typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and - potentially additional measurements. - df_upstream ([pd.DataFrame]): Dataframe containing rows indicating - wind direction ranges and the corresponding upstream turbines for - that wind direction range. This variable can be generated with - flasc.utilities.floris_tools.get_upstream_turbs_floris(...). + df (pd.Dataframe): Dataframe with measurements. This dataframe + typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and + potentially additional measurements. + df_upstream (pd.Dataframe): Dataframe containing rows indicating + wind direction ranges and the corresponding upstream turbines for + that wind direction range. This variable can be generated with + flasc.utilities.floris_tools.get_upstream_turbs_floris(...). exclude_turbs ([list, array]): array-like variable containing - turbine indices that should be excluded in determining the column - mean quantity. + turbine indices that should be excluded in determining the column + mean quantity. + + Returns: - df ([pd.DataFrame]): Dataframe which equals the inserted dataframe - plus the additional column called 'pow_ref'. + pd.Dataframe: Dataframe which equals the inserted dataframe + plus the additional column called 'pow_ref'. """ return _set_col_by_upstream_turbines( col_out="pow_ref", @@ -630,33 +743,36 @@ def set_pow_ref_by_upstream_turbines(df, df_upstream, exclude_turbs=[]): def set_pow_ref_by_upstream_turbines_in_radius( df, df_upstream, turb_no, x_turbs, y_turbs, max_radius, include_itself=False ): - """Add a column called 'pow_ref' to your dataframe, which is the + """Add pow_ref column using upstream turbines within a radius. + + Add a column called 'pow_ref' to your dataframe, which is the mean of the columns pow_%03d for turbines that are upstream and also within radius [max_radius] of the turbine of interest [turb_no]. Args: - df ([pd.DataFrame]): Dataframe with measurements. This dataframe - typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and - potentially additional measurements. - df_upstream ([pd.DataFrame]): Dataframe containing rows indicating - wind direction ranges and the corresponding upstream turbines for - that wind direction range. This variable can be generated with - flasc.utilities.floris_tools.get_upstream_turbs_floris(...). - turb_no ([int]): Turbine number from which the radius should be - calculated. + df (pd.Dataframe): Dataframe with measurements. This dataframe + typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and + potentially additional measurements. + df_upstream (pd.Dataframe): Dataframe containing rows indicating + wind direction ranges and the corresponding upstream turbines for + that wind direction range. This variable can be generated with + flasc.utilities.floris_tools.get_upstream_turbs_floris(...). + turb_no (int): Turbine number from which the radius should be + calculated. + turb_no (int): Turbine number from which the radius should be x_turbs ([list, array]): Array containing x locations of turbines. y_turbs ([list, array]): Array containing y locations of turbines. - max_radius ([float]): Maximum radius for the upstream turbines - until which they are still considered as relevant/used for the - calculation of the averaged column quantity. + max_radius (float): Maximum radius for the upstream turbines + until which they are still considered as relevant/used for the + calculation of the averaged column quantity. include_itself (bool, optional): Include the measurements of turbine - turb_no in the determination of the averaged column quantity. Defaults - to False. + turb_no in the determination of the averaged column quantity. Defaults + to False. Returns: - df ([pd.DataFrame]): Dataframe which equals the inserted dataframe - plus the additional column called 'pow_ref'. + pd.Dataframe Dataframe which equals the inserted dataframe + plus the additional column called 'pow_ref'. """ return _set_col_by_upstream_turbines_in_radius( col_out="pow_ref", @@ -675,32 +791,33 @@ def set_pow_ref_by_upstream_turbines_in_radius( def set_pow_ref_by_n_closest_upstream_turbines( df, df_upstream, turb_no, x_turbs, y_turbs, exclude_turbs=[], N=5 ): - """Add a column called 'pow_ref' to your dataframe, which is the - mean of the columns pow_%03d for the 5 closest turbines that are + """Add pow_ref column using N-nearest upstream turbines. + + Add a column called 'pow_ref' to your dataframe, which is the + mean of the columns pow_%03d for the N closest turbines that are upstream of the turbine of interest [turb_no]. Args: - df ([pd.DataFrame]): Dataframe with measurements. This dataframe - typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and - potentially additional measurements. - df_upstream ([pd.DataFrame]): Dataframe containing rows indicating - wind direction ranges and the corresponding upstream turbines for - that wind direction range. This variable can be generated with - flasc.utilities.floris_tools.get_upstream_turbs_floris(...). - turb_no ([int]): Turbine number from which the radius should be - calculated. + df (pd.Dataframe): Dataframe with measurements. This dataframe + typically consists of wd_%03d, ws_%03d, ti_%03d, pow_%03d, and + potentially additional measurements. + df_upstream (pd.Dataframe): Dataframe containing rows indicating + wind direction ranges and the corresponding upstream turbines for + that wind direction range. This variable can be generated with + flasc.utilities.floris_tools.get_upstream_turbs_floris(...). + turb_no (int): Turbine number from which the radius should be + calculated. x_turbs ([list, array]): Array containing x locations of turbines. y_turbs ([list, array]): Array containing y locations of turbines. - max_radius ([float]): Maximum radius for the upstream turbines - until which they are still considered as relevant/used for the - calculation of the averaged column quantity. - include_itself (bool, optional): Include the measurements of turbine - turb_no in the determination of the averaged column quantity. Defaults - to False. + exclude_turbs ([list, array]): array-like variable containing + turbine indices that should be excluded in determining the column + mean quantity. + N (int): Number of closest turbines to consider for the calculation + of the averaged column quantity. Defaults to 5. - Returns: - df ([pd.DataFrame]): Dataframe which equals the inserted dataframe - plus the additional column called 'pow_ref'. + Returns: + pd.Dataframe: Dataframe which equals the inserted dataframe + plus the additional column called 'pow_ref'. """ return _set_col_by_n_closest_upstream_turbines( col_out="pow_ref", @@ -717,7 +834,9 @@ def set_pow_ref_by_n_closest_upstream_turbines( def df_reduce_precision(df_in, verbose=False, allow_convert_to_integer=True): - """Reduce the precision in dataframes from float64 to float32, or possibly + """Reduce dataframe precision. + + Reduce the precision in dataframes from float64 to float32, or possibly even further to int32, int16, int8 or even bool. This operation typically reduces the size of the dataframe by a factor 2 without any real loss in precision. This can make particular operations and data storage much more @@ -725,13 +844,13 @@ def df_reduce_precision(df_in, verbose=False, allow_convert_to_integer=True): these variables. Args: - df_in ([pd.DataFrame]): Dataframe that needs to be reduced. + df_in (pd.Dataframe): Dataframe that needs to be reduced. verbose (bool, optional): Print progress. Defaults to False. allow_convert_to_integer (bool, optional): Allow reduction to integer type if possible. Defaults to True. Returns: - df_out ([pd.DataFrame]): Reduced dataframe + pd.Dataframe: Reduced dataframe """ list_out = [] dtypes = df_in.dtypes @@ -791,10 +910,18 @@ def df_reduce_precision(df_in, verbose=False, allow_convert_to_integer=True): # Functions used for dataframe processing specifically def df_drop_nan_rows(df, verbose=False): - """Remove entries in dataframe where all rows (besides 'time') + """Drop all-nan rows. + + Remove entries in dataframe where all rows (besides 'time') have nan values. - """ + Args: + df (pd.Dataframe): Input pandas dataframe + verbose (bool, optional): Print progress. Defaults to False. + + Returns: + pd.Dataframe: Dataframe with all-nan rows removed + """ N_init = df.shape[0] colnames = [c for c in df.columns if c not in ["time", "turbid", "index"]] df = df.dropna(axis=0, subset=colnames, how="all") @@ -806,7 +933,9 @@ def df_drop_nan_rows(df, verbose=False): def df_find_and_fill_data_gaps_with_missing(df, missing_data_buffer=5.0): - """This function takes a pd.DataFrame object and look for large jumps in + """Find and fill data gap and mark as missing data with NaN. + + This function takes a pd.DataFrame object and look for large jumps in the 'time' column. Rather than simply interpolating these values using a ZOH, this rather indicates that measurements are missing. Hence, this function finds these time gaps and inserts an additional row @@ -815,16 +944,15 @@ def df_find_and_fill_data_gaps_with_missing(df, missing_data_buffer=5.0): will be ignored in any further analysis. Args: - df ([pd.DataFrame]): Merged dataframe for all imported files + df (pd.Dataframe): Merged dataframe for all imported files missing_data_buffer (int, optional): If the time gaps are equal or - larger than this limit [s], then it will consider the data as - corrupted or missing. Defaults to 10. + larger than this limit [s], then it will consider the data as + corrupted or missing. Defaults to 10. Returns: - df ([pd.DataFrame]): The postprocessed dataframe where all data - within large time gaps hold value 'missing'. + pd.Dataframe: The postprocessed dataframe where all data + within large time gaps hold value 'missing'. """ - df = df.sort_values(by="time") time_values = df["time"].values @@ -879,14 +1007,13 @@ def df_sort_and_find_duplicates(df): """This function sorts the dataframe and finds rows with equal time index. Args: - df ([pd.DataFrame]): An (unsorted) dataframe + df (pd.Dataframe): An (unsorted) dataframe Returns: - df ([pd.DataFrame]): Dataframe sorted by time + pd.Dataframe: Dataframe sorted by time duplicate_entries_idx ([list of int]): list with indices of the former - of two duplicate rows. The indices correspond to the time-sorted df. + of two duplicate rows. The indices correspond to the time-sorted df. """ - df = df.sort_values(axis=0, by="time", ignore_index=True) time_delta = np.diff(df["time"].values) duplicate_entries_idx = np.where(np.abs(np.float64(time_delta)) < 1e-3)[0] @@ -907,7 +1034,8 @@ def is_day_or_night( lag_hours: float = 0, datetime_column: str = "time", ): - """ + """Determine night or day in dataframe. + Determine whether it's day or night for a given set of coordinates and UTC timestamp in a DataFrame. @@ -931,7 +1059,6 @@ def is_day_or_night( and 'is_day' (a boolean indicating whether it's daytime at the given timestamp). """ - import ephem # Import here so don't use the memory if not calling this function # Create an Observer with the given latitude and longitude @@ -965,8 +1092,7 @@ def sun_alt(row): def plot_sun_altitude_with_day_night_color(df: pd.DataFrame, ax: plt.axis = None): - """ - Plot Sun Altitude with Day-Night Color Differentiation. + """Plot sun altitude with day-night color differentiation. This function creates a plot of Sun Altitude over time, distinguishing between day and night periods @@ -977,7 +1103,7 @@ def plot_sun_altitude_with_day_night_color(df: pd.DataFrame, ax: plt.axis = None Args: df (pd.DataFrame): A DataFrame containing time, sun_altitude, and is_day columns. ax (plt.axis, optional): An optional Matplotlib axis to use for the plot. - If not provided, a new axis will be created. + If not provided, a new axis will be created. Returns: ax (plt.axis): The Matplotlib axis plotted on. @@ -1028,30 +1154,34 @@ def plot_sun_altitude_with_day_night_color(df: pd.DataFrame, ax: plt.axis = None return ax -def make_df_wide(df): - df["turbid"] = df["turbid"].astype(int) - df = df.reset_index(drop=False) - if "index" in df.columns: - df = df.drop(columns="index") - df = df.set_index(["time", "turbid"], drop=True) - df = df.unstack() - df.columns = ["%s_%s" % c for c in df.columns] - df = df.reset_index(drop=False) - return df +# TODO: This function is not referenced and doesn't connect to current code really? +# Going to comment out rather than add docstring +# def make_df_wide(df): +# df["turbid"] = df["turbid"].astype(int) +# df = df.reset_index(drop=False) +# if "index" in df.columns: +# df = df.drop(columns="index") +# df = df.set_index(["time", "turbid"], drop=True) +# df = df.unstack() +# df.columns = ["%s_%s" % c for c in df.columns] +# df = df.reset_index(drop=False) +# return df def df_sort_and_fix_duplicates(df): - """This function sorts the dataframe and addresses duplicate rows (i.e., + """Sort dataframe and fill duplicates. + + This function sorts the dataframe and addresses duplicate rows (i.e., rows in which the time index is equal). It does this by merging the two rows, replacing the 'nan' entries of one row with the non-'nan' entries of the other row. If someone both rows have different values for the same column, then an exception is thrown. Args: - df ([pd.DataFrame]): An (unsorted) dataframe + df (pd.Dataframe): An (unsorted) dataframe Returns: - df ([pd.DataFrame]): A time-sorted Dataframe in which its duplicate + df (pd.Dataframe): A time-sorted Dataframe in which its duplicate rows have been merged. """ # Check and merge any duplicate entries in the dataset diff --git a/flasc/data_processing/energy_ratio_wd_bias_estimation.py b/flasc/data_processing/energy_ratio_wd_bias_estimation.py index df607529..0e1dec78 100644 --- a/flasc/data_processing/energy_ratio_wd_bias_estimation.py +++ b/flasc/data_processing/energy_ratio_wd_bias_estimation.py @@ -1,3 +1,5 @@ +"""Module to estimate the wind direction bias.""" + import os as os from typing import Callable, List @@ -15,7 +17,9 @@ class bias_estimation(LoggingManager): - """This class can be used to estimate the bias (offset) in a wind + """Class to determine bias in wind direction measurement. + + This class can be used to estimate the bias (offset) in a wind direction measurement by comparing the energy ratios in the SCADA data with the predicted energy ratios from FLORIS under various bias correction values. Essentially, this class solves the following @@ -38,7 +42,7 @@ def __init__( """Initialize the bias estimation class. Args: - df ([pd.DataFrame]): Dataframe with the SCADA data measurements + df (pd.Dataframe): Dataframe with the SCADA data measurements formatted in the generic format. The dataframe should contain at the minimum the following columns: * Reference wind direction for the test turbine, 'wd' @@ -46,7 +50,7 @@ def __init__( * Power production of every turbine: pow_000, pow_001, ... * Reference power production used to normalize the energy ratio: 'pow_ref' - df_fm_approx ([pd.DataFrame]): Dataframe containing a large set + df_fm_approx (pd.Dataframe): Dataframe containing a large set of precomputed solutions of the FLORIS model for a range of wind directions, wind speeds (and optionally turbulence intensities). This table can be generated using the following: @@ -84,7 +88,9 @@ def _load_er_input_for_wd_bias( self, wd_bias, ): - """This function initializes an instance of the EnergyRatioInput + """Load EnergyRatioInput objects with bias. + + This function initializes an instance of the EnergyRatioInput where the dataframe is shifted by wd_bias for each test turbine. This facilitates the calculation of the energy ratios under this hypothesized wind direction bias. Additionally, the FLORIS predictions @@ -92,7 +98,7 @@ def _load_er_input_for_wd_bias( predictions are also calculated. Args: - wd_bias ([float]): Hypothesized wind direction bias in degrees. + wd_bias (float): Hypothesized wind direction bias in degrees. test_turbines ([iteratible]): List of test turbines for which each the energy ratios are calculated and the Pearson correlation coefficients are calculated. Note that this @@ -160,10 +166,26 @@ def _get_energy_ratios_allbins( N_btstrp=1, plot_iter_path=None, ): - """Calculate the energy ratios for the energy_ratio_suite objects - contained in 'self.fsc_list'. + """Calculate the energy ratios. Args: + wd_bias (float): Wind direction bias in degrees. + time_mask ([iterable], optional): Mask. If None, will not mask + the data based on this variable. Defaults to None. + ws_mask ([iterable], optional): Wind speed mask. Should be an + iterable of length 2, e.g., [6.0, 10.0], defining the lower + and upper bound, respectively. If not specified, will not + mask the data based on this variable. Defaults to (6, 10). + wd_mask ([iterable], optional): Wind direction mask. Should + be an iterable of length 2, e.g., [0.0, 180.0], defining + the lower and upper bound, respectively. If not specified, + will not mask the data based on this variable. Defaults to + None. + ti_mask ([iterable], optional): Turbulence intensity mask. + Should be an iterable of length 2, e.g., [0.04, 0.08], + defining the lower and upper bound, respectively. If not + specified, will not mask the data based on this variable. + Defaults to None. wd_step (float, optional): Wind direction discretization step size. This defines for what wind directions the energy ratio is to be calculated. Note that this does not necessarily @@ -280,6 +302,31 @@ def calculate_baseline( er_wd_bin_width=None, er_N_btstrp=1, ): + """Calculate Baseline energy ratios. + + Args: + time_mask ([iterable], optional): Time Mask. + ws_mask ([iterable], optional): Wind speed mask. Should be an + iterable of length 2, e.g., [6.0, 10.0], defining the lower + and upper bound, respectively. If not specified, will not + mask the data based on this variable. Defaults to (6, 10). + wd_mask ([iterable], optional): Wind direction mask. Should + be an iterable of length 2, e.g., [0.0, 180.0], defining + the lower and upper bound, respectively. If not specified, + will not mask the data based on this variable. Defaults to + None. + ti_mask ([iterable], optional): Turbulence intensity mask. + Should be an iterable of length 2, e.g., [0.04, 0.08], + defining the lower and upper bound, respectively. If not + specified, will not mask the data based on this variable. + Defaults to None. + er_wd_step (float, optional): Wind direction step size. Defaults to 3.0. + er_ws_step (float, optional): Wind speed step size. Defaults to 5.0. + er_wd_bin_width ([type], optional): Wind direction bin width. Defaults to None. + er_N_btstrp (int, optional): Number of bootstrap evaluations for + uncertainty quantification (UQ). If N_btstrp=1, will not + perform any uncertainty quantification. Defaults to 1. + """ # TODO: is this calculate_baseline method needed? self._get_energy_ratios_allbins( wd_bias=0.0, @@ -309,11 +356,13 @@ def estimate_wd_bias( er_N_btstrp=1, plot_iter_path=None, ): - """Estimate the wind direction bias by comparing the SCADA data + """Estimate wd bias. + + Estimate the wind direction bias by comparing the SCADA data under various wind direction corrections to its FLORIS predictions. Args: - time_mask ([iterable], optional): Wind speed mask. Should be an + time_mask ([iterable], optional): Time mask. Should be an iterable of length 2, e.g., [pd.to_datetime("2019-01-01"), pd.to_datetime("2019-04-01")], defining the lower and upper bound, respectively. If not specified, will not mask the data @@ -336,6 +385,8 @@ def estimate_wd_bias( direction offsets to consider. Defaults to (-180., 180.). opt_search_brute_dx (float, optional): Number of points to discretize the search space over. Defaults to 5. + opt_workers (int, optional): Number of workers to use for the + optimization. Defaults to 4. er_wd_step (float, optional): Wind direction discretization step size. This defines for what wind directions the energy ratio is to be calculated. Note that this does not necessarily @@ -361,8 +412,8 @@ def estimate_wd_bias( None. Returns: - x_opt ([float]): Optimal wind direction offset. - J_opt ([float]): Cost function under optimal offset. + A tuple (float, float): The optimal wind direction offset and + the cost function under the optimal offset. """ self.logger.info("Estimating the wind direction bias") @@ -455,11 +506,13 @@ def opt_finish(func, x0, args=()): def plot_energy_ratios( self, show_uncorrected_data=False, save_path=None, format="png", dpi=200 ): - """Plot the energy ratios for the currently evaluated wind + """Plot energy ratios. + + Plot the energy ratios for the currently evaluated wind direction offset term. Args: - show_uncorrcted_data (bool, optional): Compute and show the + show_uncorrected_data (bool, optional): Compute and show the uncorrected energy ratio (with wd_bias=0) on the plot. Defaults to False. save_path ([str], optional): Path to save the figure to. If not diff --git a/flasc/data_processing/filtering.py b/flasc/data_processing/filtering.py index 75c02d79..e20171f9 100644 --- a/flasc/data_processing/filtering.py +++ b/flasc/data_processing/filtering.py @@ -1,3 +1,6 @@ +"""Implement filtering class and functions for FLASC data.""" + + import itertools import matplotlib.pyplot as plt @@ -18,16 +21,47 @@ def df_get_no_faulty_measurements(df, turbine): + """Get the number of faulty measurements for a specific turbine. + + Args: + df (pd.DataFrame): Dataframe containing the turbine data, + formatted in the generic SCADA data format. Namely, the + dataframe should at the very least contain the columns: + * Time of each measurement: time + * Wind speed of each turbine: ws_000, ws_001, ... + * Power production of each turbine: pow_000, pow_001, ... + turbine (int): The turbine identifier for which the number of + faulty measurements should be counted. + + Returns: + N_isnan (int): Number of faulty measurements for the turbine. + """ if isinstance(turbine, str): turbine = int(turbine) entryisnan = np.isnan(df["pow_%03d" % turbine].astype(float)) - # cols = [s for s in df.columns if s[-4::] == ('_%03d' % turbine)] - # entryisnan = (np.sum(np.isnan(df[cols]),axis=1) > 0) N_isnan = np.sum(entryisnan) return N_isnan def df_mark_turbdata_as_faulty(df, cond, turbine_list, exclude_columns=[]): + """Mark turbine data as faulty based on a condition. + + Args: + df (pd.DataFrame): Dataframe containing the turbine data, + formatted in the generic SCADA data format. + cond (iteratible): List or array-like variable with bool entries + depicting whether the condition is met or not. These should be + situations in which you classify the data as faulty. For example, + high wind speeds but low power productions, or NaNs, self-flagged + status variables. + turbine_list (int, list): Turbine identifier(s) for which the data + should be flagged as faulty when the condition is met. + exclude_columns (list, optional): List of columns that should not + be considered for the filtering. Defaults to []. + + Returns: + pd.DataFrame: Dataframe with the faulty measurements marked as None. + """ if isinstance(turbine_list, (np.integer, int)): turbine_list = [turbine_list] @@ -39,7 +73,9 @@ def df_mark_turbdata_as_faulty(df, cond, turbine_list, exclude_columns=[]): class FlascFilter: - """This class allows a user to filter turbine data based on the + """Implement filtering class for SCADA data. + + This class allows a user to filter turbine data based on the wind-speed power curve. This class includes several useful filtering methods: 1. Filtering based on prespecified boxes/windows. Any data outside @@ -56,14 +92,14 @@ def __init__(self, df, turbine_names=None): """Initializes the class. Args: - df ([pd.DataFrame]): Dataframe containing the turbine data, + df (pd.DataFrame): Dataframe containing the turbine data, formatted in the generic SCADA data format. Namely, the dataframe should at the very least contain the columns: * Time of each measurement: time * Wind speed of each turbine: ws_000, ws_001, ... * Power production of each turbine: pow_000, pow_001, ... + turbine_names (list, optional): List of turbine names. Defaults to None. """ - # Write dataframe to self self._df_initial = df.copy() self.reset_filters() @@ -73,7 +109,9 @@ def __init__(self, df, turbine_names=None): # Private methods def _get_all_unique_flags(self): - """Private function that grabs all the unique filter flags + """Returns all unique flags in the dataframe. + + Private function that grabs all the unique filter flags that are available in self.df_filters and returns them as a list of strings. This is helpful when plotting the various filter sources in a scatter plot, for example. @@ -108,13 +146,15 @@ def _reset_mean_power_curves(self, ws_bins=np.arange(0.0, 25.5, 0.5)): self.pw_curve_df = pw_curve_df def _get_mean_power_curves(self, df=None, turbine_subset=None): - """Calculates the mean power production in bins of the wind speed, + """Calculates the mean power production in bins of the wind speed. + + Calculates the mean power production in bins of the wind speed, for all turbines in the wind farm. Args: ws_bins ([iteratible], optional): Wind speed bins. Defaults to np.arange(0.0, 25.5, 0.5). - df ([pd.DataFrame]): Dataframe containing the turbine data, + df (pd.DataFrame): Dataframe containing the turbine data, formatted in the generic SCADA data format. Namely, the dataframe should at the very least contain the columns: * Time of each measurement: time @@ -123,12 +163,12 @@ def _get_mean_power_curves(self, df=None, turbine_subset=None): turbine_subset (list, optional): List of turbine indices to calculate the mean power curve for. If None is specified, defaults to calculating it for all turbines. + Returns: - pw_curve_df ([pd.DataFrame]): Dataframe containing the wind + pd.DataFrame: Dataframe containing the wind speed bins and the mean power production value for every turbine. """ - # If df unspecified, use the locally filtered variable if df is None: df = self.df @@ -173,7 +213,6 @@ def _get_mean_power_curves(self, df=None, turbine_subset=None): # Public methods def reset_filters(self): """Reset all filter variables and assume all data is clean.""" - # Copy the original, unfiltered dataframe from self df = self._df_initial self.df = df.reset_index(drop=("time" in df.columns)) @@ -200,7 +239,9 @@ def filter_by_condition( verbose: bool = True, apply_filters_to_df: bool = True, ): - """This is a generic method to filter the dataframe for any particular + """Filter the dataframe for a specific condition, for a specific turbine. + + This is a generic method to filter the dataframe for any particular condition, for a specific turbine or specific set of turbines. This provides a platform for user-specific queries to filter and then inspect the data with. You can call this function multiple times and the filters @@ -230,7 +271,7 @@ def filter_by_condition( high wind speeds but low power productions, or NaNs, self-flagged status variables. label (str): Name or description of the fault/condition that is flagged. - ti (int): Turbine indentifier, typically an integer, but may also be a + ti (int): Turbine identifier, typically an integer, but may also be a list. This flags the measurements of all these turbines as faulty for which condition==True. verbose (bool, optional): Print information to console. Defaults to True. @@ -238,11 +279,10 @@ def filter_by_condition( self.df directly as NaN. Defaults to True. Returns: - df_out: The filtered dataframe. All measurements that are flagged as faulty + pd.Dataframe: The filtered dataframe. All measurements that are flagged as faulty are overwritten by "None"/"NaN". If apply_filters_to_df==True, then this dataframe is equal to the internally filtered dataframe 'self.df'. """ - # Pour it into a list format if isinstance(ti, int): ti = [ti] @@ -294,7 +334,9 @@ def filter_by_sensor_stuck_faults( plot: bool = False, verbose: bool = True, ): - """Filter the turbine measurements for sensor-stuck type of faults. This is + """Filter the turbine measurements for sensor-stuck type of faults. + + This is the situation where a turbine measurement reads the exact same value for multiple consecutive timestamps. This typically indicates a "frozen" sensor rather than a true physical effect. This is particularly the case for @@ -332,12 +374,11 @@ def filter_by_sensor_stuck_faults( verbose (bool, optional): Print information to console. Defaults to True. Returns: - self.df: Pandas DataFrame with the filtered data, in which faulty turbine + pd.Dataframe: Pandas DataFrame with the filtered data, in which faulty turbine measurements are flagged as None/NaN. This is an aggregated filtering variable, so it includes faulty-flagged measurements from filter operations in previous steps. """ - # Filter sensor faults using the separate function call stuck_indices = find_sensor_stuck_faults( df=self.df, @@ -375,37 +416,44 @@ def filter_by_power_curve( no_iterations=10, cutoff_ws=20.0, ): - """Filter the data by offset from the mean power curve in x- - directions. This is an iterative process because the estimated mean + """Filter the data by offset from the mean power curve in x-directions. + + This is an iterative process because the estimated mean curve actually changes as data is filtered. This process typically converges within a couple iterations. Args: ti (int): The turbine identifier for which the data should be - filtered. + filtered. m_ws_lb (float, optional): Multiplier on the wind speed defining - the left bound for the power curve. Any data to the left of this - curve is considered faulty. Defaults to 0.95. + the left bound for the power curve. Any data to the left of this + curve is considered faulty. Defaults to 0.95. m_pow_lb (float, optional): Multiplier on the power defining - the left bound for the power curve. Any data to the left of this - curve is considered faulty. Defaults to 1.01. + the left bound for the power curve. Any data to the left of this + curve is considered faulty. Defaults to 1.01. m_ws_rb (float, optional): Multiplier on the wind speed defining - the right bound for the power curve. Any data to the right of this - curve is considered faulty. Defaults to 1.05. + the right bound for the power curve. Any data to the right of this + curve is considered faulty. Defaults to 1.05. m_pow_rb (float, optional): Multiplier on the power defining - the right bound for the power curve. Any data to the right of this - curve is considered faulty. Defaults to 0.99. + the right bound for the power curve. Any data to the right of this + curve is considered faulty. Defaults to 0.99. + ws_deadband (float, optional): Deadband in [m/s] around the median + power curve around which data is by default classified as valid. + Defaults to 0.50. + pow_deadband (float, optional): Deadband in [kW] around the median + power curve around which data is by default classified as valid. + Defaults to 20.0. no_iterations (int, optional): Number of iterations. The - solution typically converges in 2-3 steps, but as the process is - very fast, it's better to run a higher number of iterations. - Defaults to 10. + solution typically converges in 2-3 steps, but as the process is + very fast, it's better to run a higher number of iterations. + Defaults to 10. cutoff_ws (float, optional): Upper limit for the filtering to occur. - Typically, this is a value just below the cut-out wind speed. Namely, - issues arise if you put this wind speed above the cut-out wind speed, - because we effectively end up with two curves for the same power - production (one at region 2, one going down from cut-out wind speed). - This confuses the algorithm. Hence, suggested to put this somewhere - around 15-25 m/s. Defaults to 20 m/s. + Typically, this is a value just below the cut-out wind speed. Namely, + issues arise if you put this wind speed above the cut-out wind speed, + because we effectively end up with two curves for the same power + production (one at region 2, one going down from cut-out wind speed). + This confuses the algorithm. Hence, suggested to put this somewhere + around 15-25 m/s. Defaults to 20 m/s. """ # Initialize the dataframe from self, as a starting point. Note # that in each iteration, we do not want to build upon the @@ -545,36 +593,43 @@ def filter_by_floris_power_curve( pow_deadband=20.0, cutoff_ws=20.0, ): - """Filter the data by offset from the floris power curve in x- - directions. + """Filter the data by offset from the floris power curve. Args: fm (FlorisModel): The FlorisModel object for the farm + ti (int): The turbine identifier for which the data should be + filtered. m_ws_lb (float, optional): Multiplier on the wind speed defining - the left bound for the power curve. Any data to the left of this - curve is considered faulty. Defaults to 0.95. + the left bound for the power curve. Any data to the left of this + curve is considered faulty. Defaults to 0.95. m_pow_lb (float, optional): Multiplier on the power defining - the left bound for the power curve. Any data to the left of this - curve is considered faulty. Defaults to 1.01. + the left bound for the power curve. Any data to the left of this + curve is considered faulty. Defaults to 1.01. m_ws_rb (float, optional): Multiplier on the wind speed defining - the right bound for the power curve. Any data to the right of this - curve is considered faulty. Defaults to 1.05. + the right bound for the power curve. Any data to the right of this + curve is considered faulty. Defaults to 1.05. m_pow_rb (float, optional): Multiplier on the power defining - the right bound for the power curve. Any data to the right of this - curve is considered faulty. Defaults to 0.99. + the right bound for the power curve. Any data to the right of this + curve is considered faulty. Defaults to 0.99. ws_deadband (float, optional): Deadband in [m/s] around the median - power curve around which data is by default classified as valid. - Defaults to 0.50. + power curve around which data is by default classified as valid. + Defaults to 0.50. pow_deadband (float, optional): Deadband in [kW] around the median - power curve around which data is by default classified as valid. - Defaults to 20.0. + power curve around which data is by default classified as valid. + Defaults to 20.0. cutoff_ws (float, optional): Wind speed up to which the median - power curve is calculated and the data is filtered for. You should - make sure this variable is set to a value above the rated wind - speed and below the cut-out wind speed. If you are experiencing - problems with data filtering and your data points have a downward - trend near the high wind speeds, try decreasing this variable's - value to 15.0. + power curve is calculated and the data is filtered for. You should + make sure this variable is set to a value above the rated wind + speed and below the cut-out wind speed. If you are experiencing + problems with data filtering and your data points have a downward + trend near the high wind speeds, try decreasing this variable's + value to 15.0. + + Returns: + pd.Dataframe: Pandas DataFrame with the filtered data, in which faulty turbine + measurements are flagged as None/NaN. This is an aggregated filtering + variable, so it includes faulty-flagged measurements from filter + operations in previous steps. """ logger.info("Filtering data by deviations from the floris power curve...") @@ -715,13 +770,9 @@ def get_power_curve(self, calculate_missing=True): calculate_missing (bool, optional): Calculate the median power curves for the turbines for the turbines of which their power curves were previously not yet calculated. + Returns: - pw_curve_df ([pd.DataFrame]): Dataframe containing the wind - speed bins and the mean power production value for every - turbine. - calculate_missing (bool, optional): Calculate the median power - curves for the turbines for the turbines of which their - power curves were previously not yet calculated. + pd.DataFrame: Dataframe containing the estimated mean power curves. """ if calculate_missing and (self.pw_curve_df.isna().all(axis=0).any()): turbine_subset = np.where( @@ -734,15 +785,20 @@ def get_power_curve(self, calculate_missing=True): return self.pw_curve_df def plot_farm_mean_power_curve(self, fm=None): - """Plot all turbines' power curves in a single figure. Also estimate + """Plot mean of all turbines' power curves and show individual curves. + + Also estimate and plot a mean turbine power curve. Args: fm (FlorisModel): The FlorisModel object for the farm. If specified by the user, then the farm-average turbine power curve from FLORIS will be plotted on top of the SCADA-based power curves. - """ + Returns: + tuple (fig, ax): The figure and axis objects of the plot. + + """ # Get mean power curves for the turbines that are not yet calculated if self.pw_curve_df.isna().all(axis=0).any(): turbine_subset = np.where( @@ -785,7 +841,9 @@ def plot_farm_mean_power_curve(self, fm=None): def plot_filters_custom_scatter( self, ti, x_col, y_col, xlabel="Wind speed (m/s)", ylabel="Power (kW)", ax=None ): - """Plot the filtered data in a scatter plot, categorized + """Plot the filtered data in a scatter plot. + + Plot the filtered data in a scatter plot, categorized by the source of their filter/fault. This is a generic function that allows the user to plot various numeric variables on the x and y axis. @@ -862,7 +920,9 @@ def plot_filters_custom_scatter_bokeh( ylabel="Power (kW)", p=None, ): - """Plot the filtered data in a scatter plot, categorized + """Plot the filtered data in a scatter plot. + + Plot the filtered data in a scatter plot, categorized by the source of their filter/fault. This is a generic function that allows the user to plot various numeric variables on the x and y axis. @@ -951,7 +1011,9 @@ def plot_filters_custom_scatter_bokeh( return p def plot_filters_in_ws_power_curve(self, ti, fm=None, ax=None): - """Plot the wind speed power curve and connect each faulty datapoint + """Plot faulty data in the wind speed power curve. + + Plot the wind speed power curve and connect each faulty datapoint to the label it was classified as faulty with. Args: @@ -960,8 +1022,10 @@ def plot_filters_in_ws_power_curve(self, ti, fm=None, ax=None): use this to plot the turbine power curves as implemented in floris. Defaults to None. ax (plt.Axis): Pyplot Axis object. - """ + Returns: + ax: The figure axis in which the scatter plot is drawn. + """ if ax is None: _, ax = plt.subplots(figsize=(10, 5)) @@ -1017,7 +1081,9 @@ def plot_filters_in_ws_power_curve(self, ti, fm=None, ax=None): return ax def plot_postprocessed_in_ws_power_curve(self, ti, fm=None, ax=None): - """Plot the wind speed power curve and mark faulty data according to + """Plot the postprocessed data in the wind speed power curve. + + Plot the wind speed power curve and mark faulty data according to their filters. Args: @@ -1027,8 +1093,10 @@ def plot_postprocessed_in_ws_power_curve(self, ti, fm=None, ax=None): Defaults to None. ax (Matplotlib.pyplot Axis, optional): Axis to plot in. If None is specified, creates a new figure and axis. Defaults to None. - """ + Returns: + ax: The figure axis in which the scatter plot is drawn. + """ if ax is None: _, ax = plt.subplots(figsize=(10, 5)) @@ -1093,7 +1161,9 @@ def plot_postprocessed_in_ws_power_curve(self, ti, fm=None, ax=None): return ax def plot_filters_in_time(self, ti, ax=None): - """Generate bar plot where each week of data is gathered and its + """Plot the filtered data in time. + + Generate bar plot where each week of data is gathered and its filtering results will be shown relative to the data size of each week. This plot can particularly be useful to investigate whether certain weeks/time periods show a particular high number of faulty @@ -1135,7 +1205,9 @@ def plot_filters_in_time(self, ti, ax=None): return ax def plot_filters_in_time_bokeh(self, ti, p=None): - """Generate bar plot where each week of data is gathered and its + """Plot the filtered data in time. + + Generate bar plot where each week of data is gathered and its filtering results will be shown relative to the data size of each week. This plot can particularly be useful to investigate whether certain weeks/time periods show a particular high number of faulty @@ -1147,8 +1219,10 @@ def plot_filters_in_time_bokeh(self, ti, p=None): ti (int): Index of the turbine of interest. p (Bokeh Figure, optional): Figure to plot in. If None is specified, creates a new figure. Defaults to None. - """ + Returns: + axis: The figure axis in which the scatter plot is + """ if p is None: p = figure( title="Filters over time", @@ -1200,19 +1274,21 @@ def plot_filters_in_time_bokeh(self, ti, p=None): def filter_df_by_faulty_impacting_turbines(df, ti, df_impacting_turbines, verbose=True): - """Assigns a turbine's measurement to NaN for each timestamp for which any of the turbines + """Assign faulty measurements based on upstream turbines faults. + + Assigns a turbine's measurement to NaN for each timestamp for which any of the turbines that are shedding a wake on this turbine is reporting NaN measurements. Args: df (pd.DataFrame): Dataframe with SCADA data with measurements - formatted according to wd_000, wd_001, wd_002, pow_000, pow_001, - pow_002, and so on. - ti (integer): Turbine number for which we are filtering the data. - Basically, each turbine that impacts that power production of - turbine 'ti' by more than 0.1% is required to be reporting a - non-faulty measurement. If not, we classify the measurement of - turbine 'ti' as faulty because we cannot sufficiently know the - inflow conditions of this turbine. + formatted according to wd_000, wd_001, wd_002, pow_000, pow_001, + pow_002, and so on. + ti (int): Turbine number for which we are filtering the data. + Basically, each turbine that impacts that power production of + turbine 'ti' by more than 0.1% is required to be reporting a + non-faulty measurement. If not, we classify the measurement of + turbine 'ti' as faulty because we cannot sufficiently know the + inflow conditions of this turbine. df_impacting_turbines (pd.DataFrame): A Pandas DataFrame in the format of: @@ -1238,7 +1314,6 @@ def filter_df_by_faulty_impacting_turbines(df, ti, df_impacting_turbines, verbos pd.DataFrame: The postprocessed dataframe for 'df', filtered for inter-turbine issues like curtailment and turbine downtime. """ - # Get number of turbines n_turbines = dfm.get_num_turbines(df) diff --git a/flasc/data_processing/find_sensor_faults.py b/flasc/data_processing/find_sensor_faults.py index 957de948..be2050d8 100644 --- a/flasc/data_processing/find_sensor_faults.py +++ b/flasc/data_processing/find_sensor_faults.py @@ -1,3 +1,5 @@ +"""Module for finding sensor-stuck faults in a dataframe.""" + import os import matplotlib.pyplot as plt @@ -19,6 +21,25 @@ def find_sensor_stuck_faults( verbose=False, return_by_column=False, ): + """Find sensor-stuck faults in a dataframe. + + Args: + df (pd.DataFrame): The dataframe containing the data. + columns (list): The columns to check for sensor-stuck faults. + ti (Any): unused + stddev_threshold (float, optional): The threshold for the standard deviation of the + consecutive measurements. Defaults to 0.001. + n_consecutive_measurements (int, optional): The number of consecutive measurements to + compare. Defaults to 3. + plot_figures (bool, optional): Whether to plot figures for the sensor-stuck faults. + Defaults to True. + verbose (bool, optional): Whether to print verbose output. Defaults to False. + return_by_column (bool, optional): Whether to return the faults by column. + Defaults to False. + + Returns: + np.array: The indices of the sensor-stuck faults + """ # Settings which indicate a sensor-stuck type of fault: the standard # deviation between the [no_consecutive_measurements] number of # consecutive measurements is less than [stddev_threshold]. diff --git a/flasc/data_processing/northing_offset.py b/flasc/data_processing/northing_offset.py index 10f32b14..b07a2a98 100644 --- a/flasc/data_processing/northing_offset.py +++ b/flasc/data_processing/northing_offset.py @@ -1,3 +1,5 @@ +"""Module for cross-checking the consistency of the northing offset between turbines.""" + from datetime import timedelta as td import matplotlib.pyplot as plt @@ -12,9 +14,23 @@ logger = logger_manager.logger # Obtain the reusable logger +# TODO: This one is used but is it all fully up to date? def crosscheck_northing_offset_consistency( df, fm, bias_timestep=td(days=120), nan_thrshld=0.50, plot_figure=True ): + """Cross-check the consistency of the northing offset between turbines. + + Args: + df (pd.DataFrame): DataFrame containing the relevant data. + fm (floris.simulation.Floris): Floris object. + bias_timestep (timedelta, optional): Time step for bias calculation. + Defaults to td(days=120). + nan_thrshld (float, optional): Threshold for NaN values. Defaults to 0.50. + plot_figure (bool, optional): Whether to plot the figure. Defaults to True. + + Returns: + list: List of strings indicating the status of each turbine + """ # Load data and extract info num_turbines = len(fm.layout_x) turbine_list = range(num_turbines) diff --git a/flasc/data_processing/time_operations.py b/flasc/data_processing/time_operations.py index da6756e2..e619c882 100644 --- a/flasc/data_processing/time_operations.py +++ b/flasc/data_processing/time_operations.py @@ -1,3 +1,5 @@ +"""Time operations for data processing.""" + from datetime import timedelta as td import numpy as np @@ -19,12 +21,25 @@ def df_movingaverage( center=True, calc_median_min_max_std=False, ): - """ + """Compute a moving average of a dataframe with angular columns. + Note that median, minimum, and maximum do not handle angular quantities and should be treated carefully. Standard deviation handles angular quantities. - """ + Args: + df_in (pd.DataFrame): Input dataframe. + cols_angular (list): List of angular columns. + window_width (timedelta): Width of the moving average window. + min_periods (int): Minimum number of periods to consider. + center (bool): Center the time index. Default is True. + calc_median_min_max_std (bool): Calculate median, min, max, and std. + Default is False. + + Returns: + pd.DataFrame: Output dataframe with moving averages. + + """ df = df_in.set_index("time").copy() # Find non-angular columns @@ -103,6 +118,25 @@ def df_downsample( calc_median_min_max_std=False, return_index_mapping=False, ): + """Downsample a dataframe to a average accounting for angular columns. + + Args: + df_in (pd.DataFrame): Input dataframe. + cols_angular (list): List of angular columns. + window_width (timedelta): Width of the average window. + min_periods (int): Minimum number of data points for a bin to be valid. + center (bool): Center the time index. Default is False. + calc_median_min_max_std (bool): Calculate median, min, max, and std. + Default is False. + return_index_mapping (bool): Return index mapping. Default is False. + + Returns: + A tuple (pd.DataFrame, np.ndarray) if return_index_mapping is True. Where + the DataFrame is the downsampled dataframe and the np.ndarray is the + index mapping. + A pd.DataFrame if return_index_mapping is False. + + """ # Copy and ensure dataframe is indexed by time df = df_in.copy() if "time" not in df.columns: @@ -308,6 +342,21 @@ def get_last_index(x): def df_resample_by_interpolation( df, time_array, circular_cols, interp_method="linear", max_gap=None, verbose=True ): + """Resample a dataframe by interpolation onto a new time array. + + Args: + df (pd.DataFrame): Input dataframe. + time_array (np.array): New time array. + circular_cols (list): List of columns that are circular. + interp_method (str): Interpolation method. Default is "linear". + max_gap (float): Maximum gap for interpolation. Default is None. + If None, will be set to 1.5 times the median timestep. + verbose (bool): Print information. Default is True. + + Returns: + pd.DataFrame: Resampled dataframe + + """ # Copy with properties but no actual data df_res = df.head(0).copy() @@ -371,6 +420,15 @@ def df_resample_by_interpolation( # Function from "EFFECTIVE PANDAS" for flattening multi-level column names def flatten_cols(df): + """Flatten multi-level columns in a DataFrame. + + Args: + df (pd.DataFrame): Input DataFrame. + + Returns: + pd.DataFrame: Flattened DataFrame. + + """ cols = ["_".join(map(str, vals)) for vals in df.columns.to_flat_index()] df.columns = cols return df diff --git a/flasc/logging_manager.py b/flasc/logging_manager.py index 560a3e1f..71da7595 100644 --- a/flasc/logging_manager.py +++ b/flasc/logging_manager.py @@ -1,3 +1,10 @@ +"""This module provides a class for managing logging in FLASC. + +The LoggingManager class provides a simple interface for configuring the root +logger for the FLASC package. + +""" + import logging from datetime import datetime @@ -5,12 +12,30 @@ class TracebackInfoFilter(logging.Filter): - """Clear or restore the exception on log records""" + """Filters exception information from log records. + + This filter can be used to either clear or restore exception traceback + information from log records. + """ def __init__(self, clear=True): + """Initialize the filter. + + Args: + clear (bool, optional): If True, clear the stack info. If False, restore it. + Defaults to True. + """ self.clear = clear def filter(self, record): + """Filter the log record to clear or restore the stack info. + + Args: + record (logging.LogRecord): The log record to filter. + + Returns: + bool: True if the record should be logged, False otherwise. + """ if self.clear: record._stack_info_hidden, record.stack_info = record.stack_info, None elif hasattr(record, "_stack_info_hidden"): @@ -20,9 +45,7 @@ def filter(self, record): class LoggingManager: - """ - This class provides easy access to a configured logger. - """ + """This class provides easy access to a configured logger.""" def __init__( self, @@ -32,6 +55,16 @@ def __init__( file_level="INFO", console_timestamp=True, ): + """Initialize the LoggingManager. + + Args: + log_to_console (bool, optional): If True, log to the console. Defaults to True. + console_level (str, optional): The logging level for the console. Defaults to "INFO". + log_to_file (bool, optional): If True, log to a file. Defaults to False. + file_level (str, optional): The logging level for the file. Defaults to "INFO". + console_timestamp (bool, optional): If True, include a timestamp in console logs. + Defaults to True. + """ self.log_to_console = log_to_console self.console_level = console_level self.log_to_file = log_to_file @@ -40,8 +73,8 @@ def __init__( self._setup_logger() def _setup_logger(self): - """ - Configures the root logger based on the default or user-specified settings. + """Configures the root logger based on the default or user-specified settings. + As needed, a StreamHandler is created for console logging or FileHandler is created for file logging. Both can be attached to the root logger for use throughout FLASC. @@ -84,5 +117,10 @@ def _setup_logger(self): @property def logger(self): + """Get the logger for the class. + + Returns: + logging.Logger: The logger for the class. + """ caller_name = f"{type(self).__module__}.{type(self).__name__}" return logging.getLogger(caller_name) diff --git a/flasc/model_fitting/floris_sensitivity_analysis.py b/flasc/model_fitting/floris_sensitivity_analysis.py index 006733bc..010bea86 100644 --- a/flasc/model_fitting/floris_sensitivity_analysis.py +++ b/flasc/model_fitting/floris_sensitivity_analysis.py @@ -1,3 +1,6 @@ +"""_summary_.""" + + import matplotlib.pyplot as plt import numpy as np import pandas as pd @@ -13,7 +16,16 @@ class floris_sobol_analysis: + """_summary_.""" + def __init__(self, fi, problem, calc_second_order=False): + """_summary_. + + Args: + fi (_type_): _description_ + problem (_type_): _description_ + calc_second_order (bool, optional): _description_. Defaults to False. + """ self.fm = fi # Default parameters @@ -91,6 +103,13 @@ def _create_evals_dataframe(self): # Step 1: generating samples for a particular problem def generate_samples(self, N, problem=None, calc_second_order=None): + """_summary_. + + Args: + N (_type_): _description_ + problem (_type_, optional): _description_. Defaults to None. + calc_second_order (_type_, optional): _description_. Defaults to None. + """ if problem is None: problem = self.problem @@ -107,6 +126,17 @@ def generate_samples(self, N, problem=None, calc_second_order=None): self._create_evals_dataframe() def calculate_wfpower_for_samples(self, num_threads=1): + """_summary_. + + Args: + num_threads (int, optional): _description_. Defaults to 1. + + Raises: + DataError: _description_ + + Returns: + _type_: _description_ + """ if self.samples_x is None: raise DataError("Please run generate_samples first.") @@ -124,6 +154,14 @@ def calculate_wfpower_for_samples(self, num_threads=1): return self.samples_y def get_sobol_sensitivity_indices(self, verbose=False): + """_summary_. + + Args: + verbose (bool, optional): _description_. Defaults to False. + + Returns: + _type_: _description_ + """ self.Si = sobol.analyze( self.problem, self.samples_y, @@ -133,6 +171,19 @@ def get_sobol_sensitivity_indices(self, verbose=False): return self.Si def plot_sobol_results(self, save_path=None, fig_format="png", fig_dpi=200): + """_summary_. + + Args: + save_path (_type_, optional): _description_. Defaults to None. + fig_format (str, optional): _description_. Defaults to "png". + fig_dpi (int, optional): _description_. Defaults to 200. + + Raises: + DataError: _description_ + + Returns: + _type_: _description_ + """ if self.Si is None: raise DataError( "No Sobol results to show. " + "Have you run get_sobol_sensitivity_indices()?" @@ -232,6 +283,16 @@ def plot_sobol_results(self, save_path=None, fig_format="png", fig_dpi=200): return fig, ax def plot_convergence(self, save_path=None, fig_format="png", fig_dpi=200): + """_summary_. + + Args: + save_path (_type_, optional): _description_. Defaults to None. + fig_format (str, optional): _description_. Defaults to "png". + fig_dpi (int, optional): _description_. Defaults to 200. + + Returns: + _type_: _description_ + """ logger.info("Analyzing convergence...") # Create copies of original results diff --git a/flasc/model_fitting/floris_tuning.py b/flasc/model_fitting/floris_tuning.py index 7a33a0bc..eba394bf 100644 --- a/flasc/model_fitting/floris_tuning.py +++ b/flasc/model_fitting/floris_tuning.py @@ -1,15 +1,4 @@ -# Copyright 2023 NREL -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -# See https://nrel.github.io/flasc/ for documentation +"""Module for tuning FLORIS to SCADA data.""" # This is a preliminary implementation of tuning methods for FLORIS to SCADA. # The code is focused on methods for the Empirical Guassian wake model and is @@ -30,18 +19,16 @@ def evaluate_overall_wake_loss(df_, df_freq=None): - """ - Evaluate the overall wake loss from pow_ref to pow_test as percent reductions + """Evaluate the overall wake loss from pow_ref to pow_test as percent reductions. Args: - df_ (DataFrame): Polars dataframe possibly containing Null values + df_ (pl.DataFrame): Polars dataframe possibly containing Null values df_freq (Dataframe): Not yet used Returns: float: Overall wake losses """ - # Not sure yet if we want to figure out how to use df_freq here return 100 * (df_["pow_ref"].sum() - df_["pow_test"].sum()) / df_["pow_ref"].sum() @@ -62,11 +49,29 @@ def sweep_velocity_model_parameter_for_overall_wake_losses( ws_max=50.0, df_freq=None, # Not yet certain we will use this ): - """ - Sweep the parameter in FLORIS using the values in value_candidates, and compare to - SCADA data in df_scada_in using the overall_wake_loss - """ + """Sweep the parameter in FLORIS using the values in value_candidates. + + Compare to SCADA data in df_scada_in using the overall_wake_loss + Args: + parameter (str): The parameter to sweep + value_candidates (list): The values to sweep + df_scada_in (DataFrame): The SCADA data + fm_in (floris.tools.Floris): The FLORIS model + ref_turbines (list): The reference turbines + test_turbines (list): The test turbines + param_idx (int): The parameter index + yaw_angles (np.ndarray): The yaw angles + wd_min (float): The minimum wind direction + wd_max (float): The maximum wind direction + ws_min (float): The minimum wind speed + ws_max (float): The maximum wind speed + df_freq (DataFrame): The frequency data + + Returns: + A tuple (np.ndarray, np.ndarray) where the first element is the FLORIS wake losses + and the second element is the SCADA wake losses + """ # Currently assuming pow_ref and pow_test already assigned # Also assuming limit to ws/wd range accomplished but could revisit? @@ -121,11 +126,18 @@ def sweep_velocity_model_parameter_for_overall_wake_losses( def select_best_wake_model_parameter(floris_results, scada_results, value_candidates, ax=None): - """ - Consider the provided velocity parameters and determine the best fit with - respect to squared error - """ + """Determine the best fit with respect to squared error. + + Args: + floris_results (np.ndarray): The FLORIS wake losses + scada_results (np.ndarray): The SCADA wake losses + value_candidates (np.ndarray): The parameter values + ax (Axes): The axes to plot on. If None, no plot is made. + Default is None. + Returns: + float: best fit parameter value + """ error_values = (floris_results - scada_results) ** 2 best_param = value_candidates[np.argmin(error_values)] @@ -159,12 +171,34 @@ def sweep_wd_std_for_er( df_freq=None, # Not yet certain we will use this, remove_all_nulls=False, ): - """ - Determine the best-fit wd_std for FLORIS by comparison with energy ratio plots + """Determine the best-fit wd_std for FLORIS by comparison with energy ratio plots. TODO: Reimplement that comparison only takes place when FLORIS value is below some threshold - """ + Args: + value_candidates (list): The values to sweep + df_scada_in (DataFrame): The SCADA data + df_approx_ (DataFrame): The FLORIS approximation data + ref_turbines (list): The reference turbines + test_turbines (list): The test turbines + yaw_angles (np.ndarray): The yaw angles + wd_step (float): The wind direction step + wd_min (float): The minimum wind direction + wd_max (float): The maximum wind direction + ws_step (float): The wind speed step + ws_min (float): The minimum wind speed + ws_max (float): The maximum wind speed + bin_cols_in (list): The bin columns + weight_by (str): The weight method. Can be 'min' or 'sum'. + Default is 'min'. + df_freq (DataFrame): The frequency data + remove_all_nulls (bool): Remove all nulls. Default is False. + + Returns: + A tuple (np.ndarray, list): The first element is the FLORIS energy ratio errors + and the second element is the dataframes. + + """ # Currently assuming pow_ref and pow_test already assigned # Also assuming limit to ws/wd range accomplished but could revisit? @@ -245,11 +279,17 @@ def sweep_wd_std_for_er( def select_best_wd_std(er_results, value_candidates, ax=None): - """ - Consider the provided wd_std and determine the best fit with - respect to squared error - """ + """Consider wd_std and determine the best fit with respect to squared error. + + Args: + er_results (np.ndarray): The energy ratio errors + value_candidates (np.ndarray): The parameter values + ax (Axes): The axes to plot on. If None, no plot is made. + Default is None. + Returns: + float: The best parameter value + """ error_sq = er_results**2 best_param = value_candidates[np.argmin(error_sq)] @@ -286,11 +326,36 @@ def sweep_deflection_parameter_for_total_uplift( df_freq=None, # Not yet certain we will use this, remove_all_nulls=False, ): - """ - Sweep values of the deflection parameter in FLORIS and compare to SCADA - data with respect to overall uplift - """ + """Sweep values of the deflection parameter in FLORIS and compare to SCADA. + + Comparison is made wrt overall uplift. + Args: + parameter (str): The parameter to sweep + value_candidates (list): The values to sweep + df_scada_baseline_in (DataFrame): The baseline SCADA data + df_scada_wakesteering_in (DataFrame): The wake steering SCADA data + fm_in (floris.tools.Floris): The FLORIS model + ref_turbines (list): The reference turbines + test_turbines (list): The test turbines + yaw_angles_baseline (np.ndarray): The yaw angles for the baseline + yaw_angles_wakesteering (np.ndarray): The yaw angles for the wake steering + wd_step (float): The wind direction step + wd_min (float): The minimum wind direction + wd_max (float): The maximum wind direction + ws_step (float): The wind speed step + ws_min (float): The minimum wind speed + ws_max (float): The maximum wind speed + bin_cols_in (list): The bin columns + weight_by (str): The weight method. Can be 'min' or 'sum'. + Default is 'min'. + df_freq (DataFrame): The frequency data + remove_all_nulls (bool): Remove all nulls. Default is False. + + Returns: + A typle (np.ndarray, np.ndarray) where the first element is the FLORIS total uplifts + and the second element is the SCADA total uplifts + """ # Currently assuming pow_ref and pow_test already assigned # Also assuming limit to ws/wd range accomplished but could revisit? diff --git a/flasc/model_fitting/turbulence_estimator.py b/flasc/model_fitting/turbulence_estimator.py index 01afbfc0..fe62e026 100644 --- a/flasc/model_fitting/turbulence_estimator.py +++ b/flasc/model_fitting/turbulence_estimator.py @@ -1,3 +1,5 @@ +"""_summary_.""" + import floris as wfct import matplotlib.pyplot as plt import numpy as np @@ -10,8 +12,16 @@ logger = logger_manager.logger # Obtain the reusable logger +# TODO IS THIS USED ANYWHERE? class ti_estimator: + """_summary_.""" + def __init__(self, fm): + """_summary_. + + Args: + fm (_type_): _description_ + """ self.fm = fm self.num_turbs = len(fm.layout_x) @@ -25,6 +35,11 @@ def _reset_outputs(self): self.P_measured = None def set_measurements(self, P_measured): + """_summary_. + + Args: + P_measured (_type_): _description_ + """ if isinstance(P_measured, int) | isinstance(P_measured, float): P_measured = [P_measured] if isinstance(P_measured, list): @@ -34,6 +49,11 @@ def set_measurements(self, P_measured): self.P_measured = P_measured def get_turbine_order(self): + """_summary_. + + Returns: + _type_: _description_ + """ wd = (180 - self.fm.core.farm.wind_direction[0]) * np.pi / 180.0 rotz = np.matrix([[np.cos(wd), -np.sin(wd), 0], [np.sin(wd), np.cos(wd), 0], [0, 0, 1]]) x0 = np.mean(self.fm.layout_x) @@ -55,6 +75,14 @@ def get_turbine_order(self): return turbine_list_ordered def get_turbine_pairs(self, wake_loss_thrs=0.20): + """_summary_. + + Args: + wake_loss_thrs (float, optional): _description_. Defaults to 0.20. + + Returns: + _type_: _description_ + """ fm = self.fi fm.run() power_baseline = np.array(fm.get_turbine_power()) @@ -85,6 +113,11 @@ def get_turbine_pairs(self, wake_loss_thrs=0.20): return df_pairs def plot_flowfield(self): + """_summary_. + + Returns: + _type_: _description_ + """ self.fm.run() fig, ax = plt.subplots() hor_plane = self.fm.get_hor_plane() @@ -92,6 +125,13 @@ def plot_flowfield(self): return fig, ax, hor_plane def floris_set_ws_wd_ti(self, wd=None, ws=None, ti=None): + """_summary_. + + Args: + wd (_type_, optional): _description_. Defaults to None. + ws (_type_, optional): _description_. Defaults to None. + ti (_type_, optional): _description_. Defaults to None. + """ self.fm = ftools._fi_set_ws_wd_ti(self.fi, wd=wd, ws=ws, ti=ti) def _check_measurements(self): @@ -104,6 +144,17 @@ def _check_measurements(self): def estimate_farmaveraged_ti( self, Ns=50, bounds=(0.01, 0.50), refine_with_fmin=False, verbose=False ): + """_summary_. + + Args: + Ns (int, optional): _description_. Defaults to 50. + bounds (tuple, optional): _description_. Defaults to (0.01, 0.50). + refine_with_fmin (bool, optional): _description_. Defaults to False. + verbose (bool, optional): _description_. Defaults to False. + + Returns: + _type_: _description_ + """ self._check_measurements() out = opt.estimate_ti( fi=self.fi, @@ -124,6 +175,17 @@ def estimate_farmaveraged_ti( return ti_opt def estimate_local_tis(self, Ns=50, bounds=(0.01, 0.50), refine_with_fmin=False, verbose=False): + """_summary_. + + Args: + Ns (int, optional): _description_. Defaults to 50. + bounds (tuple, optional): _description_. Defaults to (0.01, 0.50). + refine_with_fmin (bool, optional): _description_. Defaults to False. + verbose (bool, optional): _description_. Defaults to False. + + Returns: + _type_: _description_ + """ self._check_measurements() turbines_sorted = self.turbine_list_ordered df_turbine_pairs = self.turbine_pairs @@ -157,6 +219,7 @@ def estimate_local_tis(self, Ns=50, bounds=(0.01, 0.50), refine_with_fmin=False, return out_array def plot_cost_function_farm(self): + """_summary_.""" fig, ax = plt.subplots() ax.plot(self.opt_farm["x"], self.opt_farm["J"]) ax.plot(self.opt_farm["x_opt"], self.opt_farm["J_opt"], "ro") @@ -166,6 +229,7 @@ def plot_cost_function_farm(self): ax.set_title("Farm-wide turbulence intensity estimation: cost function J") def plot_cost_functions_turbines(self): + """_summary_.""" for ti in range(self.num_turbs): fig, ax = plt.subplots() ax.plot(self.opt_turbines[ti]["x"], self.opt_turbines[ti]["J"]) @@ -176,8 +240,14 @@ def plot_cost_functions_turbines(self): ax.set_title("Turbulence intensity estimation for turbine %03d: cost function J" % ti) def plot_power_bars(self): + """_summary_. + + Returns: + _type_: _description_ + """ fm = self.fi fm.run() + fig, ax = plt.subplots() ax.bar(x=np.array(range(self.num_turbs)) - 0.15, height=fm.get_turbine_power(), width=0.3) ax.bar(x=np.array(range(self.num_turbs)) + 0.15, height=self.P_measured, width=0.3) diff --git a/flasc/model_fitting/yaw_pow_fitting.py b/flasc/model_fitting/yaw_pow_fitting.py index aaa13f4d..d97579c1 100644 --- a/flasc/model_fitting/yaw_pow_fitting.py +++ b/flasc/model_fitting/yaw_pow_fitting.py @@ -1,3 +1,4 @@ +"""Module for fitting yaw loss power curve.""" import os import matplotlib.pyplot as plt @@ -11,8 +12,18 @@ logger = logger_manager.logger # Obtain the reusable logger +# TODO: Is this class still used anywhere? class yaw_pow_fitting: + """Class for fitting yaw loss power curve.""" + def __init__(self, df, df_upstream=None, ti=0): # , turbine_list='all'): + """Initialize the yaw power curve fitting object. + + Args: + df (pd.DataFrame): DataFrame containing the relevant data. + df_upstream (pd.DataFrame): DataFrame containing the upstream conditions. + ti (int): Index of the turbine to fit the yaw power curve to. + """ logger.info("Initializing yaw power curve filtering object.") # Assign dataframes to self # self.df_upstream = df_upstream @@ -22,6 +33,13 @@ def __init__(self, df, df_upstream=None, ti=0): # , turbine_list='all'): # self.set_turbine_mode(turbine_list) def set_df(self, df, df_upstream, ti): + """Set the dataframe for the yaw power curve fitting object. + + Args: + df (pd.DataFrame): DataFrame containing the relevant data. + df_upstream (pd.DataFrame): DataFrame containing the upstream conditions. + ti (int): Index of the turbine to fit the yaw power curve to. + """ # if 'vane_000' not in df.columns: # raise KeyError('vane_000 not found in dataset.') @@ -69,6 +87,14 @@ def set_df(self, df, df_upstream, ti): # self.num_turbines = len(turbine_list) def calculate_curves(self, vane_bounds=(-15.0, 15.0), dv=1.0, Pmin=10.0): + """Calculate the yaw-power curve. + + Args: + vane_bounds (tuple): Tuple containing the lower and upper bounds of the vane angle. + Default is (-15.0, 15.0). + dv (float): Bin width for the vane angle. Default is 1.0. + Pmin (float): Minimum power value to consider. Default is 10.0. + """ df = self.df # df_upstream = self.df_upstream # turbine_list = self.turbine_list @@ -117,6 +143,21 @@ def estimate_cos_pp_fit( opt_pp_range=(1.0, 10.0), opt_Ns=41, ): + """Estimate the best fit for a cos(x-x0)^pp curve. + + Args: + opt_yshift_range (tuple): Tuple containing the lower and upper bounds for the y shift. + Default is None. + opt_bias_range (tuple): Tuple containing the lower and upper bounds for the bias. + Default is (-15.0, 15.0). + opt_pp_range (tuple): Tuple containing the lower and upper bounds for the power. + Default is (1.0, 10.0). + opt_Ns (int): Number of samples to use for the optimization. Default is 41. + + Returns: + x_opt (np.array): Optimal parameters for the cos(x-x0)^pp curve. + Where x[0] is the y shift, x[1] is the bias, and x[2] is the exponent pp. + """ # for ti in self.turbine_list: bins_x = self.bins_x bins_y = self.bins_y @@ -163,6 +204,15 @@ def cost(x): return x_opt def plot(self, save_path=None, fig_dpi=250): + """Plot the yaw-power curve. + + Args: + save_path (str): Path to save the figure. Default is None. + fig_dpi (int): DPI of the figure. Default is 250. + + Returns: + A tuple (matplotlib.figure.Figure, matplotlib.axes.Axes) containing the figure and axes. + """ # for ti in self.turbine_list: bins_x = self.bins_x bins_y = self.bins_y diff --git a/flasc/utilities/circular_statistics.py b/flasc/utilities/circular_statistics.py index 8ec52f8b..7789f921 100644 --- a/flasc/utilities/circular_statistics.py +++ b/flasc/utilities/circular_statistics.py @@ -1,63 +1,51 @@ +"""Circular statistics utility functions.""" + import numpy as np from floris.utilities import wrap_360 from scipy.stats import circmean def calc_wd_mean_radial(angles_array_deg, axis=0, nan_policy="omit"): - """ - Compute the mean wind direction over a given axis. Assumes that the + """Compute the mean wind direction over a given axis. + + Assumes that the input angles are specified in degrees, and returns the mean wind direction in degrees. Wrapper for scipy.stats.circmean - Inputs: - angles_array_deg - numpy array or pandas dataframe of input - wind directions. - axis - axis of array/dataframe to take average over - nan_policy - option to pass to scipy.stats.circmean; defaults to - 'omit'. (Options: 'propagate', 'raise', 'omit') + Args: + angles_array_deg (numpy array): Array of angles in degrees + axis (int): Axis along which to calculate the mean + Default is 0 + nan_policy (str): How to handle NaN values. Default is 'omit' - Outputs: - mean_wd - numpy array of mean wind directions over the provided - axis + Returns: + np.array: Mean wind direction in degrees """ - return circmean(angles_array_deg, high=360.0, axis=axis, nan_policy=nan_policy) -# def calc_wd_mean_radial_list(angles_array_list): -# if isinstance(angles_array_list, (pd.DataFrame, pd.Series)): -# array = np.array(angles_array_list) -# elif isinstance(angles_array_list, list): -# array = np.vstack(angles_array_list).T -# else: -# array = np.array(angles_array_list) - -# # Use unit vectors to calculate the mean -# dir_x = np.cos(array * np.pi / 180.).sum(axis=1) -# dir_y = np.sin(array * np.pi / 180.).sum(axis=1) - -# mean_dirs = np.arctan2(dir_y, dir_x) -# mean_out = wrap_360(mean_dirs * 180. / np.pi) - -# return mean_out - - def calculate_wd_statistics(angles_array_deg, axis=0, calc_median_min_max_std=True): """Determine statistical properties of an array of wind directions. + This includes the mean of the array, the median, the standard deviation, the minimum value and the maximum value. Args: - angles_array_deg ([float/int]): Array of angles in degrees + angles_array_deg (numpy array): Array of wind directions in degrees + axis (int): Axis along which to calculate the statistics + Default is 0 + calc_median_min_max_std (bool): Whether to calculate the median, minimum, + maximum, and standard deviation of the wind directions + Default is True Returns: - mean_wd (float): Mean wind direction in [0, 360] deg - median_wd (float): Median wind direction in [0, 360] deg - std_wd (float): Standard deviation in deg - min_wd (float): Minimum wind direction in [0, 360] deg - max_wd (float): Maximum wind direction in [0, 360] deg + A tuple containing the following values: + mean_wd (float): Mean wind direction in [0, 360] deg + median_wd (float): Median wind direction in [0, 360] deg + std_wd (float): Standard deviation in deg + min_wd (float): Minimum wind direction in [0, 360] deg + max_wd (float): Maximum wind direction in [0, 360] deg """ - # Preprocessing angles_array_deg = np.array(angles_array_deg, dtype=float) angles_array_deg = wrap_360(angles_array_deg) diff --git a/flasc/utilities/energy_ratio_utilities.py b/flasc/utilities/energy_ratio_utilities.py index 0d35ee8a..f23f01a9 100644 --- a/flasc/utilities/energy_ratio_utilities.py +++ b/flasc/utilities/energy_ratio_utilities.py @@ -1,3 +1,5 @@ +"""Utility functions for calculating energy ratios.""" + import warnings from typing import List, Optional, Union @@ -10,15 +12,14 @@ def cut( col_name: str, edges: Union[np.ndarray, list], ) -> pl.Expr: - """ - Bins the values in the specified column according to the given edges. + """Bins the values in the specified column according to the given edges. - Parameters: - col_name (str): The name of the column to bin. - edges (array-like): The edges of the bins. Values will be placed into the bin - whose left edge is the largest edge less than or equal to - the value, and whose right edge is the smallest edge - greater than the value. + Args: + col_name (str): The name of the column to bin. + edges (array-like): The edges of the bins. Values will be placed into the bin + whose left edge is the largest edge less than or equal to + the value, and whose right edge is the smallest edge + greater than the value. Returns: expression: An expression object that can be used to bin the column. @@ -39,20 +40,19 @@ def bin_column( bin_col_name: str, edges: Union[np.ndarray, list], ) -> pl.DataFrame: - """ - Bins the values in the specified column of a Polars DataFrame according to the given edges. + """Bins the values in the specified column of a Polars DataFrame according to the given edges. - Parameters: - df_ (pl.DataFrame): The Polars DataFrame containing the column to bin. - col_name (str): The name of the column to bin. - bin_col_name (str): The name to give the new column containing the bin labels. - edges (array-like): The edges of the bins. Values will be placed into the bin - whose left edge is the largest edge less than or equal to - the value, and whose right edge is the smallest edge - greater than the value. + Args: + df_ (pl.DataFrame): The Polars DataFrame containing the column to bin. + col_name (str): The name of the column to bin. + bin_col_name (str): The name to give the new column containing the bin labels. + edges (array-like): The edges of the bins. Values will be placed into the bin + whose left edge is the largest edge less than or equal to + the value, and whose right edge is the smallest edge + greater than the value. Returns: - pl.DataFrame: A new Polars DataFrame with an additional column containing the bin labels. + pl.DataFrame: A new Polars DataFrame with an additional column containing the bin labels. """ return df_.with_columns( cut(col_name=col_name, edges=edges).alias(bin_col_name).cast(df_[col_name].dtype) @@ -60,19 +60,16 @@ def bin_column( def add_ws(df_: pl.DataFrame, ws_cols: List[str], remove_all_nulls: bool = False) -> pl.DataFrame: - """ - Add the ws column to a dataframe, given which columns to average over + """Add the ws column to a dataframe, given which columns to average over. - - Parameters: - df_ (pl.DataFrame): The Polars DataFrame containing the column to bin. - ws_cols (list(str)): The name of the columns to average across. - remove_all_nulls: (bool): Remove all null values in ws_cols (rather than any) + Args: + df_ (pl.DataFrame): The Polars DataFrame containing the column to bin. + ws_cols (list(str)): The name of the columns to average across. + remove_all_nulls: (bool): Remove all null values in ws_cols (rather than any) Returns: - pl.DataFrame: A new Polars DataFrame with an additional ws column + pl.DataFrame: A new Polars DataFrame with an additional ws column """ - df_with_mean_ws = ( # df_.select(pl.exclude('ws_bin')) # In case ws_bin already exists df_.with_columns( @@ -99,27 +96,26 @@ def add_ws_bin( edges: Optional[Union[np.ndarray, list]] = None, remove_all_nulls: bool = False, ) -> pl.DataFrame: - """ - Add the ws_bin column to a dataframe, given which columns to average over - and the step sizes to use + """Add the ws_bin column to a dataframe. - Parameters: - df_ (pl.DataFrame): The Polars DataFrame containing the column to bin. - ws_cols (list(str)): The name of the columns to average across. - ws_step (float): Step size for binning - ws_min (float): Minimum wind speed - ws_max (float): Maximum wind speed - edges (array-like): The edges of the bins. Values will be placed into the bin - whose left edge is the largest edge less than or equal to - the value, and whose right edge is the smallest edge - greater than the value. Defaults to None, in which case - the edges are generated using ws_step, ws_min, and ws_max. - remove_all_nulls: (bool): Remove all null values in ws_cols (rather than any) + Given which columns to average over and the step sizes to use + + Args: + df_ (pl.DataFrame): The Polars DataFrame containing the column to bin. + ws_cols (list(str)): The name of the columns to average across. + ws_step (float): Step size for binning + ws_min (float): Minimum wind speed + ws_max (float): Maximum wind speed + edges (array-like): The edges of the bins. Values will be placed into the bin + whose left edge is the largest edge less than or equal to + the value, and whose right edge is the smallest edge + greater than the value. Defaults to None, in which case + the edges are generated using ws_step, ws_min, and ws_max. + remove_all_nulls: (bool): Remove all null values in ws_cols (rather than any) Returns: - pl.DataFrame: A new Polars DataFrame with an additional ws_bin column + pl.DataFrame: A new Polars DataFrame with an additional ws_bin column """ - if edges is None: edges = np.arange(ws_min, ws_max + ws_step, ws_step) @@ -139,19 +135,16 @@ def add_ws_bin( def add_wd(df_: pl.DataFrame, wd_cols: List[str], remove_all_nulls: bool = False) -> pl.DataFrame: - """ - Add the wd column to a dataframe, given which columns to average over - + """Add the wd column to a dataframe, given which columns to average over. - Parameters: - df_ (pl.DataFrame): The Polars DataFrame containing the column to bin. - wd_cols (list(str)): The name of the columns to average across. - remove_all_nulls: (bool): Remove all null values in wd_cols (rather than any) + Args: + df_ (pl.DataFrame): The Polars DataFrame containing the column to bin. + wd_cols (list(str)): The name of the columns to average across. + remove_all_nulls: (bool): Remove all null values in wd_cols (rather than any) Returns: - pl.DataFrame: A new Polars DataFrame with an additional wd column + pl.DataFrame: A new Polars DataFrame with an additional wd column """ - # Gather up intermediate column names and final column names wd_cols_cos = [c + "_cos" for c in wd_cols] wd_cols_sin = [c + "_sin" for c in wd_cols] @@ -206,27 +199,27 @@ def add_wd_bin( edges: Optional[Union[np.ndarray, list]] = None, remove_all_nulls: bool = False, ): - """ - Add the wd_bin column to a dataframe, given which columns to average over + """Add the wd_bin column to a dataframe. + + Given which columns to average over and the step sizes to use - Parameters: - df_ (pl.DataFrame): The Polars DataFrame containing the column to bin. - wd_cols (list(str)): The name of the columns to average across. - wd_step (float): Step size for binning - wd_min (float): Minimum wind direction - wd_max (float): Maximum wind direction - edges (array-like): The edges of the bins. Values will be placed into the bin - whose left edge is the largest edge less than or equal to - the value, and whose right edge is the smallest edge - greater than the value. Defaults to None, in which case - the edges are generated using ws_step, ws_min, and ws_max. - remove_all_nulls: (bool): Remove all null values in wd_cols (rather than any) + Args: + df_ (pl.DataFrame): The Polars DataFrame containing the column to bin. + wd_cols (list(str)): The name of the columns to average across. + wd_step (float): Step size for binning + wd_min (float): Minimum wind direction + wd_max (float): Maximum wind direction + edges (array-like): The edges of the bins. Values will be placed into the bin + whose left edge is the largest edge less than or equal to + the value, and whose right edge is the smallest edge + greater than the value. Defaults to None, in which case + the edges are generated using ws_step, ws_min, and ws_max. + remove_all_nulls: (bool): Remove all null values in wd_cols (rather than any) Returns: - pl.DataFrame: A new Polars DataFrame with an additional ws_bin column + pl.DataFrame: A new Polars DataFrame with an additional ws_bin column """ - if edges is None: edges = np.arange(wd_min, wd_max + wd_step, wd_step) @@ -250,17 +243,36 @@ def add_power_test( df_: pl.DataFrame, test_cols: List[str], ) -> pl.DataFrame: + """Add the pow_test column to a dataframe, given which columns to average over. + + Args: + df_ (pl.DataFrame): The Polars DataFrame containing the column to bin. + test_cols (list(str)): The name of the columns to average across. + + Returns: + pl.DataFrame: A new Polars DataFrame with an additional pow_test column + """ return df_.with_columns(pow_test=pl.concat_list(test_cols).list.mean()) def add_power_ref(df_: pl.DataFrame, ref_cols: List[str]): + """Add the pow_ref column to a dataframe, given which columns to average over. + + Args: + df_ (pl.DataFrame): The Polars DataFrame containing the column to bin. + ref_cols (list(str)): The name of the columns to average across. + + Returns: + pl.DataFrame: A new Polars DataFrame with an additional pow_ref column + """ return df_.with_columns(pow_ref=pl.concat_list(ref_cols).list.mean()) def add_reflected_rows(df_: pl.DataFrame, edges: Union[np.ndarray, list], overlap_distance: float): - """ - Adds rows to a datrame with where the wind direction is - reflected around the neearest edge if within overlap_distance + """Add reflected rows to a dataframe. + + Adds rows to a dataframe with where the wind direction is + reflected around the nearest edge if within overlap_distance Given a wind direction DataFrame `df_`, this function adds reflected rows to the DataFrame such that each wind direction @@ -274,23 +286,20 @@ def add_reflected_rows(df_: pl.DataFrame, edges: Union[np.ndarray, list], overla This function enables overlapping bins in the energy ratio functions - Parameters - ---------- - df_ : polars.DataFrame - The DataFrame to add reflected rows to. - edges : numpy.ndarray - An array of wind direction edges to use for reflection. - (Should be same as used in energy ratio) - overlap_distance : float - The maximum distance between a wind direction and an edge - for the wind direction to be considered overlapping. - - Returns - ------- - polars.DataFrame - A new DataFrame with the original rows and the added reflected rows. - """ + Args: + df_ : polars.DataFrame + The DataFrame to add reflected rows to. + edges : numpy.ndarray + An array of wind direction edges to use for reflection. + (Should be same as used in energy ratio) + overlap_distance : float + The maximum distance between a wind direction and an edge + for the wind direction to be considered overlapping. + Returns: + polars.DataFrame + A new DataFrame with the original rows and the added reflected rows. + """ df_add = df_.clone() wd = df_add["wd"].to_numpy() diff_matrix = wd[:, None] - edges @@ -314,7 +323,8 @@ def filter_all_nulls( ws_cols: List[str], wd_cols: List[str], ): - """ + """Filter dataframe for ALL nulls. + Filter data by requiring ALL values of ref, test, ws, and wd to be valid numbers. @@ -341,7 +351,8 @@ def filter_any_nulls( ws_cols: List[str], wd_cols: List[str], ): - """ + """Filter dataframe for ANY nulls. + Filter data by requiring ANY of ref, ANY of test, ANY of ws, and ANY of wd to be a valid number. @@ -390,11 +401,40 @@ def check_compute_energy_ratio_inputs( percentiles, remove_all_nulls, ): - """ + """Check the inputs to compute_energy_ratio. + Check inputs to compute_energy_ratio. Inputs reflect inputs to compute_energy_ratio, with exception of df_, which is passed directly instead of er_in. - """ + All the inputs of compute_energy_ratio are checked for validity. This function does not + check every input, although they are all accepted. + + Args: + df_ (pl.DataFrame): The Polars DataFrame + ref_turbines (list): A list of the reference turbine columns + test_turbines (list): A list of the test turbine columns + wd_turbines (list): A list of the wind direction columns + ws_turbines (list): A list of the wind speed columns + use_predefined_ref (bool): Whether to use predefined reference turbines + use_predefined_wd (bool): Whether to use predefined wind direction turbines + use_predefined_ws (bool): Whether to use predefined wind speed turbines + wd_step (float): Step size for binning wind direction + wd_min (float): Minimum wind direction + wd_max (float): Maximum wind direction + ws_step (float): Step size for binning wind speed + ws_min (float): Minimum wind speed + ws_max (float): Maximum wind speed + bin_cols_in (list): A list of columns to bin + weight_by (str): A string indicating how to weight the bins + df_freq (pl.DataFrame): A DataFrame containing frequency data + wd_bin_overlap_radius (float): The radius for overlapping wind direction bins + uplift_pairs (list): A list of uplift pairs + uplift_names (list): A list of uplift names + uplift_absolute (bool): Whether to use absolute uplift + N (int): Number of bootstrapping iterations + percentiles (list): A list of percentiles to calculate from bootstrap + remove_all_nulls (bool): Whether to remove all nulls + """ # Check that the inputs are valid # If use_predefined_ref is True, df_ must have a column named 'pow_ref' if use_predefined_ref: @@ -477,8 +517,7 @@ def bin_and_group_dataframe( bin_cols_without_df_name: List = None, num_df: int = 0, ): - """ - Bin and aggregate a DataFrame based on wind direction and wind speed parameters. + """Bin and aggregate a DataFrame based on wind direction and wind speed parameters. This function takes a Polars DataFrame (df_) and performs binning and aggregation operations based on @@ -511,7 +550,6 @@ def bin_and_group_dataframe( Returns: DataFrame: The resulting Polars DataFrame with aggregated statistics. """ - # If wd_bin_overlap_radius is not zero, add reflected rows if wd_bin_overlap_radius > 0.0: # Need to obtain the wd column now rather than during binning @@ -551,7 +589,8 @@ def add_bin_weights( bin_cols_without_df_name: List = None, weight_by: str = "min", ): - """ + """Add weights to DataFrame bins. + Add weights to DataFrame bins based on either frequency counts or the provided frequency table df_freq_pl. diff --git a/flasc/utilities/floris_tools.py b/flasc/utilities/floris_tools.py index 4ce4eeaf..8865ed58 100644 --- a/flasc/utilities/floris_tools.py +++ b/flasc/utilities/floris_tools.py @@ -1,3 +1,5 @@ +"""Utility functions that use FlorisModels.""" + import copy from time import perf_counter as timerpc @@ -31,7 +33,9 @@ def interpolate_floris_from_df_approx( mirror_nans=True, verbose=True, ): - """This function generates the FLORIS predictions for a set of historical + """Interpolate FLORIS predictions from a precalculated table of solutions. + + This function generates the FLORIS predictions for a set of historical data, 'df', quickly by linearly interpolating from a precalculated set of FLORIS solutions, 'df_approx'. We use linear interpolation to eliminate dependency of the computation time on the size of the dataframe/number of @@ -120,7 +124,6 @@ def interpolate_floris_from_df_approx( 52103 2018-12-31 23:30:00 15.6 11.0 0.08 4235128.7 3825108.4 ... 2725108.3 52104 2018-12-31 23:40:00 15.3 11.1 0.08 3860281.3 3987634.7 ... 2957021.7 """ - # Format dataframe and get number of turbines # df = df.reset_index(drop=('time' in df.columns)) nturbs = fsut.get_num_turbines(df_approx) @@ -327,7 +330,9 @@ def calc_floris_approx_table( ti_array=np.arange(0.03, 0.1801, 0.03), save_turbine_inflow_conditions_to_df=False, ): - """This function calculates a large number of floris solutions for a rectangular grid + """Calculate the FLORIS approximate table from a FlorisModel object. + + This function calculates a large number of floris solutions for a rectangular grid of wind directions ('wd_array'), wind speeds ('ws_array'), and optionally turbulence intensities ('ti_array'). The variables that are saved are each turbine's power production, and optionally also each turbine's inflow wind direction, wind speed and @@ -346,7 +351,7 @@ def calc_floris_approx_table( dataframe size but can provide useful information. Defaults to False. Returns: - df_approx (pd.DataFrame): A Pandas DataFrame containing the floris simulation results for all wind + pd.DataFrame: A Pandas DataFrame containing the floris simulation results for all wind direction, wind speed and turbulence intensity combinations. The outputs are the power production for each turbine, 'pow_000' until 'pow_{nturbs-1}', and optionally als each turbine's inflow wind direction, wind speed and turbulence intensity when save_turbine_inflow_conditions_to_df==True. @@ -357,8 +362,7 @@ def calc_floris_approx_table( ti_array=np.arange(0.03, 0.1801, 0.03) save_turbine_inflow_conditions_to_df=True - Yields: - + Yields: df_approx= wd ws ti pow_000 ws_000 wd_000 ti_000 pow_001 ... pow_006 ws_006 wd_006 ti_006 0 0.0 1.0 0.03 0.0 1.0 0.0 0.03 0.0 ... 0.0 1.0 0.0 0.03 @@ -374,7 +378,6 @@ def calc_floris_approx_table( 32399 357.0 25.0 0.18 0.0 24.880829 357.0 0.18 0.0 ... 0.0 24.881165 357.0 0.18 32400 360.0 25.0 0.18 0.0 24.880829 360.0 0.18 0.0 ... 0.0 24.881165 360.0 0.18 """ - # if ti_array is None, use the current value in the FLORIS object if ti_array is None: ti = fm.core.flow_field.turbulence_intensity @@ -430,7 +433,9 @@ def calc_floris_approx_table( def add_gaussian_blending_to_floris_approx_table(df_fi_approx, wd_std=3.0, pdf_cutoff=0.995): - """This function applies a Gaussian blending across the wind direction for the predicted + """Add Gaussian blending to the precalculated FLORIS solutions. + + This function applies a Gaussian blending across the wind direction for the predicted turbine power productions from FLORIS. This is a post-processing step and achieves the same result as evaluating FLORIS directly with the UncertainFlorisModel module. However, having this as a postprocess step allows for rapid generation of the FLORIS solutions for @@ -446,7 +451,7 @@ def add_gaussian_blending_to_floris_approx_table(df_fi_approx, wd_std=3.0, pdf_c deviations to the left and to the right of the evaluation. Returns: - df_fi_approx_gauss (pd.DataFrame): Pandas DataFrame with Gaussian-blurred precalculated + pd.DataFrame: Pandas DataFrame with Gaussian-blurred precalculated FLORIS solutions. The DataFrame typically has the columns "wd", "ws", "ti", and "pow_000" until "pow_{nturbs-1}", with nturbs being the number of turbines. @@ -497,27 +502,31 @@ def add_gaussian_blending_to_floris_approx_table(df_fi_approx, wd_std=3.0, pdf_c return df_fi_approx_gauss +# TODO Is this function in the right module? +# TODO Should include itself have a default? def get_turbs_in_radius( x_turbs, y_turbs, turb_no, max_radius, include_itself, sort_by_distance=False ): - """Determine which turbines are within a certain radius of other + """Find turbines within a certain radius of a turbine. + + Determine which turbines are within a certain radius of other wind turbines. Args: x_turbs ([list, array]): Long. locations of turbines y_turbs ([list, array]): Lat. locations of turbines - turb_no ([int]): Turbine number for which the distance is + turb_no (int): Turbine number for which the distance is calculated w.r.t. the other turbines. - max_radius ([float]): Maximum distance between turbines to be + max_radius (float): Maximum distance between turbines to be considered within the radius of turbine [turb_no]. - include_itself ([type]): Include itself in the list of turbines + include_itself (bool): Include itself in the list of turbines within the radius. sort_by_distance (bool, optional): Sort the output list of turbines according to distance to the turbine, from closest to furthest (but still within radius). Defaults to False. Returns: - turbs_within_radius ([list]): List of turbines that are within the + list: List of turbines that are within the prespecified distance from turbine [turb_no]. """ dr_turb = np.sqrt((x_turbs - x_turbs[turb_no]) ** 2.0 + (y_turbs - y_turbs[turb_no]) ** 2.0) @@ -538,25 +547,24 @@ def get_turbs_in_radius( def get_all_impacting_turbines_geometrical( fm, turbine_weights, wd_array=np.arange(0.0, 360.0, 3.0), wake_slope=0.30 ): - """Determine which turbines affect the turbines of interest + """Get all impacting turbines using a simple geometric model. + + Determine which turbines affect the turbines of interest (i.e., those with a turbine_weights > 0.00001). This function uses very simplified geometric functions to very quickly derive which turbines are supposedly waking at least one turbine in the farm of interest. Args: - fm ([floris object]): FLORIS object of the farm of interest. - turbine_weights [list]: List of with turbine weights with length - equal to the number of wind turbines, and typically filled with - 0s (neighbouring farms) and 1s (farm of interest). - wd_step (float, optional): Wind direction discretization step. - Defaults to 3.0. + fm (FlorisModel): FLORIS object of the farm of interest. + turbine_weights (list): List of with turbine weights with length + equal to the number of wind turbines, and typically filled with + 0s (neighboring farms) and 1s (farm of interest). + wd_array (array, optional): Array of wind directions to evaluate in [deg]. wake_slope (float, optional): linear slope of the wake (dy/dx) - plot_lines (bool, optional): Enable plotting wakes/turbines. - Defaults to False. Returns: - df_impacting_simple ([pd.DataFrame]): A Pandas Dataframe in which each row + pd.Dataframe: A Pandas Dataframe in which each row contains a wind direction and a list of turbine numbers. The turbine numbers are those turbines that should be modelled to accurately capture the wake losses for the wind farm of interest. Turbine numbers @@ -564,7 +572,6 @@ def get_all_impacting_turbines_geometrical( the simulation without affecting any of the turbines that have a nonzero turbine weight. """ - # Get farm layout x = fm.layout_x y = fm.layout_y @@ -639,14 +646,16 @@ def is_in_wake(xt, yt): def get_upstream_turbs_floris(fm, wd_step=0.1, wake_slope=0.10, plot_lines=False): - """Determine which turbines are operating in freestream (unwaked) + """Use FLORIS to determine which turbines are operating in freestream flow. + + Determine which turbines are operating in freestream (unwaked) flow, for the entire wind rose. This function will return a data- frame where each row will present a wind direction range and a set of wind turbine numbers for which those turbines are operating upstream. This is useful in determining the freestream conditions. Args: - fi ([floris object]): FLORIS object of the farm of interest. + fm ([FlorisModel): FLORIS object of the farm of interest. wd_step (float, optional): Wind direction discretization step. It will test what the upstream turbines are every [wd_step] degrees. A lower number means more accurate results, but @@ -657,7 +666,7 @@ def get_upstream_turbs_floris(fm, wd_step=0.1, wake_slope=0.10, plot_lines=False Defaults to False. Returns: - df_upstream ([pd.DataFrame]): A Pandas Dataframe in which each row + df_upstream (pd.Dataframe): A Pandas Dataframe in which each row contains a wind direction range and a list of turbine numbers. For that particular wind direction range, the turbines numbered are all upstream according to the FLORIS predictions. Depending on @@ -668,7 +677,6 @@ def get_upstream_turbs_floris(fm, wd_step=0.1, wake_slope=0.10, plot_lines=False turbines are waked/unwaked and has served useful for determining what turbines to use as reference. """ - # Get farm layout x = fm.layout_x y = fm.layout_y @@ -787,14 +795,15 @@ def get_dependent_turbines_by_wd( ws_test=9.0, return_influence_magnitudes=False, ): - """ + """Get dependent turbines for a test turbine for each wind direction. + Computes all turbines that depend on the operation of a specified turbine (test_turbine) for each wind direction in wd_array, using the FLORIS model specified by fm_in to detect dependencies. Args: - fi ([floris object]): FLORIS object of the farm of interest. - test_turbine ([int]): Turbine for which dependencies are found. + fm_in (FlorisModels): FLORIS object of the farm of interest. + test_turbine (int): Turbine for which dependencies are found. wd_array ([np.array]): Wind directions at which to determine dependencies. Defaults to [0, 2, ... , 358]. change_threshold (float): Fractional change in power needed @@ -810,16 +819,17 @@ def get_dependent_turbines_by_wd( test_turbine on all turbines. Returns: - dep_indices_by_wd (list): A 2-dimensional list. Each element of - the outer level list, which represents wind direction, - contains a list of the turbines that depend on test_turbine - for that wind direction. The second-level list may be empty - if no turbine depends on the test_turbine for that wind - direciton (e.g., the turbine is in the back row). - all_influence_magnitudes ([np.array]): 2-D numpy array of - influences of test_turbine on all other turbines, with size - (number of wind directions) x (number of turbines). Returned - only if return_influence_magnitudes is True. + A tuple containing: + dep_indices_by_wd (list): A 2-dimensional list. Each element of + the outer level list, which represents wind direction, + contains a list of the turbines that depend on test_turbine + for that wind direction. The second-level list may be empty + if no turbine depends on the test_turbine for that wind + direciton (e.g., the turbine is in the back row). + all_influence_magnitudes ([np.array]): 2-D numpy array of + influences of test_turbine on all other turbines, with size + (number of wind directions) x (number of turbines). Returned + only if return_influence_magnitudes is True. """ # Copy fi to a local to not mess with incoming fm = copy.deepcopy(fm_in) @@ -880,13 +890,14 @@ def get_all_dependent_turbines( limit_number=None, ws_test=9.0, ): - """ + """Get all dependent turbines for each turbine in the farm. + Wrapper for get_dependent_turbines_by_wd() that loops over all turbines in the farm and packages their dependencies as a pandas dataframe. Args: - fi ([floris object]): FLORIS object of the farm of interest. + fm_in (FlorisModel): FLORIS object of the farm of interest. wd_array ([np.array]): Wind directions at which to determine dependencies. Defaults to [0, 2, ... , 358]. change_threshold (float): Fractional change in power needed @@ -899,7 +910,7 @@ def get_all_dependent_turbines( determine dependencies. Defaults to 9. m/s. Returns: - df_out ([pd.DataFrame]): A Pandas Dataframe in which each row + df_out (pd.Dataframe): A Pandas Dataframe in which each row contains a wind direction, each column is a turbine, and each entry is the turbines that depend on the column turbine at the row wind direction. Dependencies can be extracted @@ -907,7 +918,6 @@ def get_all_dependent_turbines( turbine T are df_out.loc[wd, T]. Dependencies are ordered, with strongest dependencies appearing first. """ - results = [] for t_i in range(len(fm_in.layout_x)): results.append( @@ -934,14 +944,15 @@ def get_all_impacting_turbines( limit_number=None, ws_test=9.0, ): - """ + """Get all impacting turbines for each turbine in the farm. + Calculate which turbines impact a specified turbine based on the FLORIS model. Essentially a wrapper for get_dependent_turbines_by_wd() that loops over all turbines and extracts their impact magnitudes, then sorts. Args: - fi ([floris object]): FLORIS object of the farm of interest. + fm_in (FlorisModel): FLORIS object of the farm of interest. wd_array ([np.array]): Wind directions at which to determine dependencies. Defaults to [0, 2, ... , 358]. change_threshold (float): Fractional change in power needed @@ -954,7 +965,7 @@ def get_all_impacting_turbines( determine dependencies. Defaults to 9. m/s. Returns: - df_out ([pd.DataFrame]): A Pandas Dataframe in which each row + df_out (pd.Dataframe): A Pandas Dataframe in which each row contains a wind direction, each column is a turbine, and each entry is the turbines that the column turbine depends on at the row wind direction. Dependencies can be extracted @@ -962,7 +973,6 @@ def get_all_impacting_turbines( T are df_out.loc[wd, T]. Impacting turbines are simply ordered by magnitude of impact. """ - dependency_magnitudes = np.zeros((len(wd_array), len(fm_in.layout_x), len(fm_in.layout_x))) for t_i in range(len(fm_in.layout_x)): diff --git a/flasc/utilities/lookup_table_tools.py b/flasc/utilities/lookup_table_tools.py index aa5c97c7..aaf22815 100644 --- a/flasc/utilities/lookup_table_tools.py +++ b/flasc/utilities/lookup_table_tools.py @@ -1,3 +1,5 @@ +"""Module for creating interpolants from lookup tables.""" + import numpy as np from scipy.interpolate import LinearNDInterpolator @@ -5,7 +7,9 @@ def get_yaw_angles_interpolant( df_opt, ramp_up_ws=[4, 5], ramp_down_ws=[10, 12], minimum_yaw_angle=None, maximum_yaw_angle=None ): - """Create an interpolant for the optimal yaw angles from a dataframe + """Get an interpolant for the optimal yaw angles from a dataframe. + + Create an interpolant for the optimal yaw angles from a dataframe 'df_opt', which contains the rows 'wind_direction', 'wind_speed', 'turbulence_intensity', and 'yaw_angles_opt'. This dataframe is typically produced automatically from a FLORIS yaw optimization using Serial Refine @@ -14,26 +18,31 @@ def get_yaw_angles_interpolant( Args: df_opt (pd.DataFrame): Dataframe containing the rows 'wind_direction', - 'wind_speed', 'turbulence_intensity', and 'yaw_angles_opt'. + 'wind_speed', 'turbulence_intensity', and 'yaw_angles_opt'. ramp_up_ws (list, optional): List with length 2 depicting the wind - speeds at which the ramp starts and ends, respectively, on the lower - end. This variable defaults to [4, 5], meaning that the yaw offsets are - zero at and below 4 m/s, then linearly transition to their full offsets - at 5 m/s, and continue to be their full offsets past 5 m/s. Defaults to - [4, 5]. + speeds at which the ramp starts and ends, respectively, on the lower + end. This variable defaults to [4, 5], meaning that the yaw offsets are + zero at and below 4 m/s, then linearly transition to their full offsets + at 5 m/s, and continue to be their full offsets past 5 m/s. Defaults to + [4, 5]. ramp_down_ws (list, optional): List with length 2 depicting the wind - speeds at which the ramp starts and ends, respectively, on the higher - end. This variable defaults to [10, 12], meaning that the yaw offsets are - full at and below 10 m/s, then linearly transition to zero offsets - at 12 m/s, and continue to be zero past 12 m/s. Defaults to [10, 12]. + speeds at which the ramp starts and ends, respectively, on the higher + end. This variable defaults to [10, 12], meaning that the yaw offsets are + full at and below 10 m/s, then linearly transition to zero offsets + at 12 m/s, and continue to be zero past 12 m/s. Defaults to [10, 12]. + minimum_yaw_angle (float, optional): The minimum yaw angle in degrees. + Defaults to None. If None, the minimum yaw angle is set to the minimum + yaw angle in the dataset. + maximum_yaw_angle (float, optional): The maximum yaw angle in degrees. + Defaults to None. If None, the maximum yaw angle is set to the maximum + yaw angle in the dataset. Returns: LinearNDInterpolator: An interpolant function which takes the inputs - (wind_directions, wind_speeds, turbulence_intensities), all of equal - dimensions, and returns the yaw angles for all turbines. This function - incorporates the ramp-up and ramp-down regions. + (wind_directions, wind_speeds, turbulence_intensities), all of equal + dimensions, and returns the yaw angles for all turbines. This function + incorporates the ramp-up and ramp-down regions. """ - # Load data and set up a linear interpolant points = df_opt[["wind_direction", "wind_speed", "turbulence_intensity"]] values = np.vstack(df_opt["yaw_angles_opt"]) diff --git a/flasc/utilities/optimization.py b/flasc/utilities/optimization.py index b2cffd17..61b8d48b 100644 --- a/flasc/utilities/optimization.py +++ b/flasc/utilities/optimization.py @@ -1,3 +1,5 @@ +"""Module for optimization functions.""" + import copy from datetime import timedelta as td @@ -28,6 +30,47 @@ def find_timeshift_between_dfs( opt_Ns=None, verbose=True, ): + """Find the optimal time shift between two dataframes. + + This function is used to find the optimal time shift between two dataframes + 'df1' and 'df2'. The function will resample the dataframes to a common time + vector and then compare the dataframes in time steps of 't_step'. The function + will then find the optimal time shift between the two dataframes by minimizing + the Pearson correlation coefficient between the two dataframes. The function + can also correct for a bias in the y-values of the dataframes by minimizing + the Pearson correlation coefficient between the two dataframes after a bias + has been removed. + + Args: + df1 (pd.DataFrame): Dataframe 1. + df2 (pd.DataFrame): Dataframe 2. + cols_df1 (list): Columns to use in dataframe 1. + cols_df2 (list): Columns to use in dataframe 2. + use_circular_statistics (bool): Use circular statistics for averaging. + Default is False. + t_step (np.timedelta64): Time step for comparison. + Default is 30 days. + correct_y_shift (bool): Correct for a bias in the y-values. + Default is False. + y_shift_range (np.array): Range of y-shifts to evaluate. + Default is np.arange(-180.0, 180.0, 2.0). + opt_bounds (list): Bounds for optimization. + Default is None. + opt_Ns (int): Number of steps for optimization. + Default is None. + verbose (bool): Print verbose output. + Default is True. + + Returns: + list: List of dictionaries with the following keys: + - t0: Start time of comparison. + - t1: End time of comparison. + - x_opt: Optimal time shift. + - J_opt: Optimal cost function value. + - x: Time shifts evaluated. + - J: Cost function values evaluated. + + """ if np.any(df1["time"].diff() < td(seconds=0)): raise DataError("Dataframe 1 not sorted by time.") @@ -210,6 +253,26 @@ def cost_fun(x_shift): def match_y_curves_by_offset(yref, ytest, dy_eval=None, angle_wrapping=True): + """Match two curves by finding the optimal offset. + + This function is used to match two curves by finding the optimal offset + between the two curves. The function will minimize the mean squared error + between the two curves for a range of offsets. + + Args: + yref (np.array): Reference curve. + ytest (np.array): Test curve. + dy_eval (np.array): Range of offsets to evaluate. + Default is None. + angle_wrapping (bool): Use angle wrapping for evaluation. + Default is True. + + Returns: + Tuple (float, float) with the following elements: + dwd_opt: Optimal offset. + J_opt: Optimal cost function value. + + """ if dy_eval is None: if angle_wrapping: dy_eval = np.arange(-180.0, 180.0, 2.0) @@ -242,7 +305,7 @@ def match_y_curves_by_offset(yref, ytest, dy_eval=None, angle_wrapping=True): def estimate_ti( - fi, + fm, P_measured, Ns, bounds, @@ -251,8 +314,34 @@ def estimate_ti( refine_with_fmin=False, verbose=False, ): + """Estimate the turbulence intensity for a given turbine. + + This function is used to estimate the turbulence intensity for a given + turbine. + + Args: + fm (FlorisModel): FlorisModel object. + P_measured (np.array): Measured power data. + Ns (int): Number of steps for optimization. + bounds (list): Bounds for optimization. + turbine_upstream (int): Upstream turbine. + turbines_downstream (list): Downstream turbines. + refine_with_fmin (bool): Refine with fmin. + Default is False. + verbose (bool): Print verbose output. + Default is False. + + Returns: + dict: Dictionary with the following + keys: + - x_opt: Optimal time shift. + - J_opt: Optimal cost function value. + - x: Time shifts evaluated. + - J: Cost function values evaluated. + + """ # Make copy so that existing object is not changed - fm = copy.deepcopy(fi) + fm = copy.deepcopy(fm) num_turbines = len(fm.layout_x) ti_0 = np.mean(fm.core.farm.turbulence_intensity) @@ -260,7 +349,7 @@ def estimate_ti( def cost_fun(ti): ti_array = np.repeat(ti_0, num_turbines) ti_array[turbine_upstream] = ti - ftools._fi_set_ws_wd_ti(fi, ti=ti_array) + ftools._fi_set_ws_wd_ti(fm, ti=ti_array) fm.run() Pturbs = np.array(fm.get_turbine_power()) Pturbs = Pturbs[turbines_downstream] diff --git a/flasc/utilities/tuner_utilities.py b/flasc/utilities/tuner_utilities.py index 9644527f..88d82679 100644 --- a/flasc/utilities/tuner_utilities.py +++ b/flasc/utilities/tuner_utilities.py @@ -1,3 +1,12 @@ +"""Utilities for tuning FLORIS parameters. + +This module contains utilities for tuning FLORIS parameters. This includes +functions for resimulating FLORIS with SCADA data, and functions for setting +parameters in a FLORIS model. + +""" + + from pathlib import Path from typing import Any, Dict, List, Optional @@ -8,27 +17,24 @@ from flasc.data_processing import dataframe_manipulations as dfm from flasc.utilities.utilities_examples import load_floris_smarteole -# from floris import ParallelComputingInterface - def replicate_nan_values(df_1: pd.DataFrame, df_2: pd.DataFrame): - """ - Replicate NaN Values in DataFrame df_2 to Match DataFrame df_1. + """Replicate NaN Values in DataFrame df_2 to Match DataFrame df_1. For columns that are common between df_1 and df_2, this function ensures that NaN values in df_2 appear in the same locations as NaN values in df_1. This is primarily useful when df_2 represents a FLORIS resimulation of df_1, and you want to ensure that missing data is consistent between the two DataFrames. - Parameters: - - df_1 (pandas.DataFrame): The reference DataFrame containing NaN values. - - df_2 (pandas.DataFrame): The DataFrame to be updated to match NaN positions in df_1. + Args: + df_1 (pandas.DataFrame): The reference DataFrame containing NaN values. + df_2 (pandas.DataFrame): The DataFrame to be updated to match NaN positions in df_1. Returns: - - pandas.DataFrame: A new DataFrame with NaN values in df_2 replaced to match df_1. + pandas.DataFrame: A new DataFrame with NaN values in df_2 replaced to match df_1. """ # For columns which df_1 and df_2 have in common, make sure - # occurences of NaNs which appear in df_1 + # occurrences of NaNs which appear in df_1 # appear in the same location in df_2 # This function is primarily for the case where df_2 is # a FLORIS resimulation of df_1 and making sure @@ -47,6 +53,7 @@ def replicate_nan_values(df_1: pd.DataFrame, df_2: pd.DataFrame): def nested_get(dic: Dict[str, Any], keys: List[str]) -> Any: """Get a value from a nested dictionary using a list of keys. + Based on: stackoverflow.com/questions/14692690/access-nested-dictionary-items-via-a-list-of-keys Args: @@ -63,6 +70,7 @@ def nested_get(dic: Dict[str, Any], keys: List[str]) -> Any: def nested_set(dic: Dict[str, Any], keys: List[str], value: Any, idx: Optional[int] = None) -> None: """Set a value in a nested dictionary using a list of keys. + Based on: stackoverflow.com/questions/14692690/access-nested-dictionary-items-via-a-list-of-keys Args: @@ -89,13 +97,28 @@ def nested_set(dic: Dict[str, Any], keys: List[str], value: Any, idx: Optional[i def resim_floris(fm_in: FlorisModel, df_scada: pd.DataFrame, yaw_angles: np.array = None): + """Resimulate FLORIS with SCADA data. + + This function takes a FlorisModel and a SCADA dataframe, and resimulates the + FlorisModel with the SCADA data. The SCADA data is expected to contain columns + for wind speed, wind direction, and power reference. The function returns a + dataframe containing the power output of each turbine in the FlorisModel. + + Args: + fm_in (FlorisModel): The FlorisModel to resimulate. + df_scada (pd.DataFrame): The SCADA data to use for resimulation. + yaw_angles (np.array, optional): The yaw angles to use for resimulation. Defaults to None. + + Returns: + pd.DataFrame: A DataFrame containing the power output of each turbine in the FlorisModel. + """ # Get wind speeds and directions wind_speeds = df_scada["ws"].values wind_directions = df_scada["wd"].values # TODO: better handling of TIs? turbulence_intensities = fm_in.turbulence_intensities[0] * np.ones_like(wind_speeds) - # Get the number of turbiens + # Get the number of turbines num_turbines = dfm.get_num_turbines(df_scada) # Set up the FLORIS model diff --git a/flasc/utilities/utilities.py b/flasc/utilities/utilities.py index de630e40..d874c63f 100644 --- a/flasc/utilities/utilities.py +++ b/flasc/utilities/utilities.py @@ -1,22 +1,20 @@ +"""Utility functions for the FLASC module.""" + import datetime -# import numba import numpy as np - -# import scipy.interpolate as interp from floris.utilities import wrap_360 def estimate_dt(time_array): - """Automatically estimate timestep in a time_array + """Automatically estimate timestep in a time_array. Args: - time_array ([list]): List or dataframe with time entries + time_array (list): List or dataframe with time entries Returns: - dt ([datetime.timedelta]): Timestep in dt.timedelta format + datetime.timedelta: Timestep in dt.timedelta format """ - if len(time_array) < 2: # Assume arbitrary value return datetime.timedelta(seconds=0) @@ -33,6 +31,14 @@ def estimate_dt(time_array): def get_num_turbines(df): + """Get the number of turbines in a dataframe. + + Args: + df (pd.DataFrame): Dataframe with turbine data + + Returns: + int: Number of turbines in the dataframe + """ nt = 0 while ("pow_%03d" % nt) in df.columns: nt += 1 @@ -40,6 +46,26 @@ def get_num_turbines(df): def interp_with_max_gap(x, xp, fp, max_gap, kind, wrap_around_360=False): + """Interpolate data linearly or using nearest-neighbor with maximum gap. + + If there is larger gap in data than `max_gap`, the gap will be filled + with np.nan. + + Args: + x (np.array): The output x-data; the data points in x-axis that + you want the interpolation results from. + xp (np.array): The input x-data. + fp (np.array): The input y-data. + max_gap (float): The maximum allowable distance between x and `xp` for which + interpolation is still performed. Gaps larger than + this will be filled with np.nan in the output `target_y`. + kind (str): The interpolation method to use. Can be 'linear' or 'nearest'. + wrap_around_360 (bool): If True, the interpolation will be done in a circular + fashion, i.e., the interpolation will wrap around 360 degrees. + + Returns: + np.array: The interpolation results. + """ if not ((kind == "linear") or (kind == "nearest")): raise NotImplementedError("Unknown interpolation method specified.") @@ -71,38 +97,28 @@ def _interpolate_with_max_gap( kind="linear", extrapolate=True, ): - """ - Interpolate data linearly or using nearest-neighbor with maximum gap. + """Interpolate data linearly or using nearest-neighbor with maximum gap. + If there is larger gap in data than `max_gap`, the gap will be filled with np.nan. The input values should not contain NaNs. - Parameters - --------- - xp: np.array - The input x-data. - fp: np.array - The input y-data. - x: np.array - The output x-data; the data points in x-axis that - you want the interpolation results from. - max_gap: float - The maximum allowable distance between x and `xp` for which - interpolation is still performed. Gaps larger than - this will be filled with np.nan in the output `target_y`. - xp_is_sorted: boolean, default: True - If True, the input data `xp` is assumed to be monotonically - increasing. Some performance gain if you supply sorted input data. - x_is_sorted: boolean, default: True - If True, the input data `x` is assumed to be - monotonically increasing. Some performance gain if you supply - sorted input data. - - Returns - ------ - target_y: np.array - The interpolation results. + Args: + x (np.array): The output x-data; the data points in x-axis that + you want the interpolation results from. + xp (np.array): The input x-data. + fp (np.array): The input y-data. + max_gap (float): The maximum allowable distance between x and `xp` for which + interpolation is still performed. Gaps larger than + this will be filled with np.nan in the output `target_y`. + assume_sorted (bool): If True, assume that `xp` is sorted in ascending + order. If False, sort `xp` and `fp` to be monotonous. + kind (str): The interpolation method to use. Can be 'linear' or 'nearest'. + extrapolate (bool): If True, extrapolate the data points on the boundaries + + Returns: + np.array: The interpolation results. """ if not assume_sorted: # Sort xp and fp to be monotonous diff --git a/flasc/utilities/utilities_examples.py b/flasc/utilities/utilities_examples.py index 3b13fc77..35bc0978 100644 --- a/flasc/utilities/utilities_examples.py +++ b/flasc/utilities/utilities_examples.py @@ -1,3 +1,5 @@ +"""Utility functions for examples.""" + import copy from pathlib import Path from time import perf_counter as timerpc @@ -24,7 +26,6 @@ def load_floris_smarteole(wake_model="gch", wd_std=0.0): Returns: FlorisModel: Floris object. """ - # Use the local FLORIS GCH/CC model for the wake model settings root_path = ( Path(__file__).resolve().parents[2] / "examples_smarteole" / "floris_input_smarteole" @@ -58,16 +59,14 @@ def load_floris_artificial(wake_model="gch", wd_std=0.0, cosine_exponent=None): wake_model (str, optional): The wake model that FLORIS should use. Common options are 'cc', 'gch', 'jensen', 'turbopark' and 'emgauss' . Defaults to "gch". - operation_modes (array, optional): Array or list of integers denoting each - turbine's operation mode. When None is specified, will assume each turbine - is in its first operation mode (0). Defaults to None. wd_std (float, optional): Uncertainty; standard deviation in the inflow wind direction in degrees. Defaults to 0.0 deg meaning no uncertainty. + cosine_exponent (float, optional): The cosine exponent for the power-thrust + table. Defaults to None. Returns: FlorisModel: Floris object. """ - # Use the local FLORIS GCH/CC model for the wake model settings root_path = ( Path(__file__).resolve().parents[2] / "examples_artificial_data" / "floris_input_artificial" diff --git a/flasc/version.py b/flasc/version.py index cd5ac039..38f77a65 100644 --- a/flasc/version.py +++ b/flasc/version.py @@ -1 +1 @@ -2.0 +2.0.1 diff --git a/flasc/visualization.py b/flasc/visualization.py index a58da77e..dda3395a 100644 --- a/flasc/visualization.py +++ b/flasc/visualization.py @@ -1,3 +1,9 @@ +"""Module for visualization of FLASC data. + +This module contains functions for visualizing data from the FLASC package. + +""" + import numpy as np import pandas as pd import scipy.stats as st @@ -16,7 +22,9 @@ def plot_with_wrapping( label=None, rasterized=False, ): - """Plot a line on an axis that deals with angle wrapping. Normally, using + """Plot a line on an axis that deals with angle wrapping. + + Normally, using pyplot will blindly connects points that wrap around 360 degrees, e.g., going from 357 degrees to 2 degrees. This will create a strong vertical line through the plot that connects the two points, while actually these @@ -27,41 +35,41 @@ def plot_with_wrapping( Args: x (iteratible): NumPy array or list containing indices/time stamps of - the data. + the data. y (iteratible): NumPy array containing direction/angle data that - requires 360 deg angle wrapping. These are typically wind directions - or nacelle headings. + requires 360 deg angle wrapping. These are typically wind directions + or nacelle headings. ax (plt.Axis, optional): Axis object of the matplotlib.pyplot class. - The line will be plotted on this axis. If None specified, will create - a figure and axis, and plot the line in there. Defaults to None. + The line will be plotted on this axis. If None specified, will create + a figure and axis, and plot the line in there. Defaults to None. low (float, optional): Lower limit at which the angles should be - wrapped. When using degrees, this should be typically 0.0 deg for wind - directions and nacelle directions (i.e., compass directions). When using - vane signals, this is typically -180.0 deg. When using radians, - this should be the equivalent in radians (e.g., 0 or - np.pi). - Defaults to 0.0. + wrapped. When using degrees, this should be typically 0.0 deg for wind + directions and nacelle directions (i.e., compass directions). When using + vane signals, this is typically -180.0 deg. When using radians, + this should be the equivalent in radians (e.g., 0 or - np.pi). + Defaults to 0.0. high (float, optional): Upper limit at which the angles should be - wrapped. When using degrees, this should be 360.0 deg for wind - directions and nacelle directions (i.e., compass directions). - When using radians, this should be the equivalent in radians. - Defaults to 360.0. + wrapped. When using degrees, this should be 360.0 deg for wind + directions and nacelle directions (i.e., compass directions). + When using radians, this should be the equivalent in radians. + Defaults to 360.0. linestyle (str, optional): Linestyle for the plot. Defaults to "-". marker (str, optional): Marker style for the plot. If None is - specified, will not use markers. Defaults to None. + specified, will not use markers. Defaults to None. color (str, optional): Color of the line and markers. Defaults to - "black". + "black". label (string, optional): Label for the line and markers. If None is - specified, will not label the line. Defaults to None. - rasterize (bool, optional): Rasterize the plot/line and thereby remove - its vectorized properties. This can help reduce the size of a .pdf or - .eps file when this figure is saved, at the cost of a potential - reduction in image quality. + specified, will not label the line. Defaults to None. + rasterized (bool, optional): Rasterize the plot/line and thereby remove + its vectorized properties. This can help reduce the size of a .pdf or + .eps file when this figure is saved, at the cost of a potential + reduction in image quality. + Returns: ax: Axis object of the matplotlib.pyplot class on which the line (and - optionally markers) are plotted. + optionally markers) are plotted. """ - # Create figure, if not provided if ax is None: fig, ax = plt.subplots() @@ -139,11 +147,27 @@ def plot_with_wrapping( def generate_default_labels(fm): + """Generate default labels for a FlorisModel. + + Args: + fm (FlorisModel): A FlorisModel instance. + + Returns: + list: A list of labels for the turbines in the FlorisModel. + """ labels = ["T{0:02d}".format(ti) for ti in range(len(fm.layout_x))] return labels def generate_labels_with_hub_heights(fm): + """Generate labels for a FlorisModel with hub heights. + + Args: + fm (FlorisModel): A FlorisModel instance. + + Returns: + list: A list of labels for the turbines in the FlorisModel. + """ labels = [ "T{0:02d} ({1:.1f} m)".format(ti, h) for ti, h in enumerate(fm.core.farm.hub_heights.flatten()) @@ -164,9 +188,9 @@ def plot_binned_mean_and_ci( alpha_scatter=0.1, confidence_level=0.95, ): - """ - Plot data to a single axis. Method - has options to include scatter of underlying data, specifiying + """Plot the mean and confidence interval of y as a function of x. + + Method has options to include scatter of underlying data, specifying bin edges, and plotting confidence interval. Args: @@ -188,11 +212,10 @@ def plot_binned_mean_and_ci( confidence interval. Defaults to True. alpha_scatter (float, optional): Alpha for scatter plot. Defaults to 0.5. - confidenceLevel (float, optional): Confidence level for + confidence_level (float, optional): Confidence level for confidence interval. Defaults to 0.95. """ - # Check the length of x equals length of y if len(x) != len(y): raise ValueError("x and y must be the same length") diff --git a/flasc/yaw_optimizer_visualization.py b/flasc/yaw_optimizer_visualization.py index bccf8128..632829a4 100644 --- a/flasc/yaw_optimizer_visualization.py +++ b/flasc/yaw_optimizer_visualization.py @@ -1,3 +1,6 @@ +"""Module for visualizing yaw optimizer results.""" + + import matplotlib.pyplot as plt import numpy as np import pandas as pd @@ -15,6 +18,18 @@ def plot_uplifts_by_atmospheric_conditions( wd_edges=np.arange(0.0, 360.0001, 3.0), ti_edges=np.arange(0.0, 0.30, 0.02), ): + """Plot relative power gains and contributions to AEP uplift by atmospheric conditions. + + This function plots the relative power gains and contributions to AEP uplift + + Args: + df_list (List[pd.DataFrame]): List of dataframes with power gains and contributions + to AEP uplift. + labels (List[str]): List of labels for the dataframes. Defaults to None. + ws_edges (np.array): Wind speed bin edges. Defaults to np.arange(3.0, 17.0, 1.0). + wd_edges (np.array): Wind direction bin edges. Defaults to np.arange(0.0, 360.0001, 3.0). + ti_edges (np.array): Turbulence intensity bin edges. Defaults to np.arange(0.0, 0.30, 0.02). + """ # Calculate bin means ws_labels = (ws_edges[0:-1] + ws_edges[1::]) / 2.0 wd_labels = (wd_edges[0:-1] + wd_edges[1::]) / 2.0 @@ -142,7 +157,8 @@ def _plot_bins(x, y, yn, xlabel=None, ylabel=None, labels=None): def plot_offsets_wswd_heatmap(df_offsets, turb_id, ax=None): - """ + """Plot offsets for a single turbine as a heatmap in wind speed. + df_offsets should be a dataframe with columns: - wind_direction, - wind_speed, @@ -153,8 +169,16 @@ def plot_offsets_wswd_heatmap(df_offsets, turb_id, ax=None): to contain individual turbine offsets in distinct columns (unlike the yaw_angles_opt column from FLORIS. - """ + Args: + df_offsets (pd.DataFrame): dataframe with offsets + turb_id (int or str): turbine id or column name + ax (matplotlib.axes.Axes): axis to plot on. If None, a new figure is created. + Default is None. + + Returns: + A tuple containing a matplotlib.axes.Axes object and a matplotlib.colorbar.Colorbar + """ if isinstance(turb_id, int): if "yaw_angles_opt" in df_offsets.columns: offsets = np.vstack(df_offsets.yaw_angles_opt.to_numpy())[:, turb_id] @@ -197,8 +221,10 @@ def plot_offsets_wswd_heatmap(df_offsets, turb_id, ax=None): return ax, cbar +# TODO: This function feels a little old fashioned def plot_offsets_wd(df_offsets, turb_id, ws_plot, color="black", alpha=1.0, label=None, ax=None): - """ + """Plot offsets for a single turbine as a function of wind direction. + df_offsets should be a dataframe with columns: - wind_direction, - wind_speed, @@ -208,8 +234,17 @@ def plot_offsets_wd(df_offsets, turb_id, ws_plot, color="black", alpha=1.0, labe a two-element tuple or list, that range of wind speeds is plotted. label only allowed is single wind speed is given. - """ + Args: + df_offsets (pd.DataFrame): dataframe with offsets + turb_id (int or str): turbine id or column name + ws_plot (float or list): wind speed to plot + color (str): color of line + alpha (float): transparency of line + label (str): label for line + ax (matplotlib.axes.Axes): axis to plot on. If None, a new figure is created. + Default is None. + """ if isinstance(turb_id, int): if "yaw_angles_opt" in df_offsets.columns: offsets = np.vstack(df_offsets.yaw_angles_opt.to_numpy())[:, turb_id] diff --git a/pyproject.toml b/pyproject.toml index 23ba50ef..2ed27f63 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,29 +22,10 @@ filterwarnings = [ [tool.ruff] -src = ["flasc", "tests"] -line-length = 100 -target-version = "py310" -extend-include = ["*.ipynb"] -ignore-init-module-imports = true - -# See https://github.com/charliermarsh/ruff#supported-rules -# for rules included and matching to prefix. -select = ["E", "F", "I"] - -# F401 unused-import: Ignore until all used isort flags are adopted in ruff -# ignore = ["F401"] - -# Ignore == -> is as this breaks in pandas -ignore = ["E712"] - -# Allow autofix for all enabled rules (when `--fix`) is provided. -# fixable = ["A", "B", "C", "D", "E", "F"] -fixable = ["E", "F", "I"] -unfixable = [] # Exclude a variety of commonly ignored directories. exclude = [ + "flasc/version.py", ".bzr", ".direnv", ".eggs", @@ -66,17 +47,46 @@ exclude = [ "venv", ] +src = ["flasc", "tests"] +line-length = 100 +target-version = "py310" +extend-include = ["*.ipynb"] + + +[tool.ruff.lint] + # Allow unused variables when underscore-prefixed. dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" +# See https://github.com/charliermarsh/ruff#supported-rules +# for rules included and matching to prefix. +select = ["E", "F", "I", "D"] + +# Ignore == -> is as this breaks in pandas +ignore = ["E712"] + +# Allow autofix for all enabled rules (when `--fix`) is provided. +# fixable = ["A", "B", "C", "D", "E", "F"] +fixable = ["E", "F", "I", "D"] +unfixable = [] -[tool.ruff.isort] +[tool.ruff.lint.isort] combine-as-imports = true known-first-party = ["flasc"] order-by-type = false # [tool.ruff.format] -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] # Ignore `F401` (import violations) in all `__init__.py` files, and in `path/to/file.py`. "__init__.py" = ["F401"] + +# Ignore pydocstyle in directories besides flasc +"flasc/timing_tests/*" = ["D"] +"flasc/version.py" = ["D"] +"tests/*" = ["D"] +"examples_*/*" = ["D"] +"docs/*" = ["D"] + +[tool.ruff.lint.pydocstyle] +convention = "google" diff --git a/setup.py b/setup.py index 91869685..fb7fb27d 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ "seaborn~=0.0", "polars==0.19.5", "ephem", - "coloredlogs~=10.0", + "coloredlogs~=15.0", ] EXTRAS = {