Skip to content

Commit

Permalink
Merge pull request #179 from holukas/time-periods-ec-flags
Browse files Browse the repository at this point in the history
Time periods ec flags
  • Loading branch information
holukas authored Aug 26, 2024
2 parents 1cfc3be + 4cac2a3 commit 2b81037
Show file tree
Hide file tree
Showing 8 changed files with 1,994 additions and 1,662 deletions.
24 changes: 23 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,27 @@

![DIIVE](images/logo_diive1_256px.png)

## v0.79.1 | 26 Aug 2024

### Additions

- Added new function to apply quality flags to certain time periods only (`diive.pkgs.qaqc.flags.restrict_application`)
- Added to option to restrict the application of the angle-of-attack flag to certain time periods (
`diive.pkgs.fluxprocessingchain.level2_qualityflags.FluxQualityFlagsEddyPro.angle_of_attack_test`)

### Changes

- Test options in `FluxProcessingChain` are now always passed as dict. This has the advantage that in addition to run
the test by setting the dict key `apply` to `True`, various other test settings can be passed, for example the new
parameter `application dates` for the angle-of-attack flag. (
`diive.pkgs.fluxprocessingchain.fluxprocessingchain.FluxProcessingChain`)

### Tests

- Added unittest for Flux Processing Chain up to Level-2 (
`tests.test_fluxprocessingchain.TestFluxProcessingChain.test_fluxprocessingchain_level2`)
- 36/36 unittests ran successfully

## v0.79.0 | 22 Aug 2024

This version introduces a histogram plot that has the option to display z-score as vertical lines superimposed on the
Expand All @@ -23,7 +44,8 @@ time series. The bin with most counts is highlighted orange.*

### Additions

- Added histogram plots to `FlagBase`, histograms are now shown for all outlier methods (`diive.core.base.flagbase.FlagBase.defaultplot`)
- Added histogram plots to `FlagBase`, histograms are now shown for all outlier methods (
`diive.core.base.flagbase.FlagBase.defaultplot`)
- Added daytime/nighttime histogram plots to (`diive.pkgs.outlierdetection.hampel.HampelDaytimeNighttime`)
- Added daytime/nighttime histogram plots to (`diive.pkgs.outlierdetection.zscore.zScoreDaytimeNighttime`)
- Added daytime/nighttime histogram plots to (`diive.pkgs.outlierdetection.lof.LocalOutlierFactorDaytimeNighttime`)
Expand Down
161 changes: 90 additions & 71 deletions diive/pkgs/fluxprocessingchain/fluxprocessingchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,11 +174,11 @@ def level2_quality_flag_expansion(
self,
signal_strength: dict or False = False,
raw_data_screening_vm97: dict or False = False,
ssitc: bool = True,
gas_completeness: bool = False,
spectral_correction_factor: bool = True,
angle_of_attack: bool = False,
steadiness_of_horizontal_wind: bool = False
ssitc: dict or False = False,
gas_completeness: dict or False = False,
spectral_correction_factor: dict or False = False,
angle_of_attack: dict or False = False,
steadiness_of_horizontal_wind: dict or False = False
):
"""Expand flux quality flag based on EddyPro output"""
idstr = 'L2'
Expand All @@ -189,21 +189,21 @@ def level2_quality_flag_expansion(
fluxbasevar=self.fluxbasevar)
self._level2.missing_vals_test()

if ssitc:
if ssitc['apply']:
self._level2.ssitc_test()

if gas_completeness:
if gas_completeness['apply']:
self._level2.gas_completeness_test()

if spectral_correction_factor:
if spectral_correction_factor['apply']:
self._level2.spectral_correction_factor_test()

if signal_strength:
if signal_strength['apply']:
self._level2.signal_strength_test(signal_strength_col=signal_strength['signal_strength_col'],
method=signal_strength['method'],
threshold=signal_strength['threshold'])

if raw_data_screening_vm97:
if raw_data_screening_vm97['apply']:
self._level2.raw_data_screening_vm97_tests(spikes=raw_data_screening_vm97['spikes'],
amplitude=raw_data_screening_vm97['amplitude'],
dropout=raw_data_screening_vm97['dropout'],
Expand All @@ -212,10 +212,10 @@ def level2_quality_flag_expansion(
skewkurt_sf=raw_data_screening_vm97['skewkurt_sf'],
discont_hf=raw_data_screening_vm97['discont_hf'],
discont_sf=raw_data_screening_vm97['discont_sf'])
if angle_of_attack:
self._level2.angle_of_attack_test()
if angle_of_attack['apply']:
self._level2.angle_of_attack_test(application_dates=angle_of_attack['application_dates'])

if steadiness_of_horizontal_wind:
if steadiness_of_horizontal_wind['apply']:
self._level2.steadiness_of_horizontal_wind()

def _finalize_level(self,
Expand Down Expand Up @@ -287,7 +287,6 @@ def level31_storage_correction(self, gapfill_storage_term: bool = False):
self._level31.storage_correction()

def finalize_level31(self):

newcols = detect_new_columns(df=self.level31.results, other=self.fpc_df)
self._fpc_df = pd.concat([self.fpc_df, self.level31.results[newcols]], axis=1)
[print(f"++Added new column {col}.") for col in newcols]
Expand Down Expand Up @@ -574,7 +573,7 @@ def example():
# Source data
from pathlib import Path
from diive.core.io.files import load_parquet
SOURCEDIR = r"L:\Sync\luhk_work\20 - CODING\29 - WORKBENCH\cha_fp2024.1_2005-2023\0_data\RESULTS-IRGA-Level-1_fluxnet_2005-2023"
SOURCEDIR = r"L:\Sync\luhk_work\20 - CODING\29 - WORKBENCH\dataset_cha_fp2024_2005-2023\0_data\RESULTS-IRGA-Level-1_fluxnet_2005-2023"
FILENAME = r"CH-CHA_IRGA_Level-1_eddypro_fluxnet_2005-2023_availableVars.parquet"
FILEPATH = Path(SOURCEDIR) / FILENAME
maindf = load_parquet(filepath=FILEPATH)
Expand Down Expand Up @@ -606,16 +605,18 @@ def example():
# --------------------
# Level-2
# --------------------
TEST_SSITC = True # Default True
TEST_GAS_COMPLETENESS = True # Default True
TEST_SPECTRAL_CORRECTION_FACTOR = True # Default True
TEST_SSITC = False # Default True
TEST_GAS_COMPLETENESS = False # Default True
TEST_SPECTRAL_CORRECTION_FACTOR = False # Default True

# Signal strength
TEST_SIGNAL_STRENGTH = False
TEST_SIGNAL_STRENGTH_COL = 'CUSTOM_AGC_MEAN'
TEST_SIGNAL_STRENGTH_METHOD = 'discard above'
TEST_SIGNAL_STRENGTH_THRESHOLD = 90
# TimeSeries(series=maindf[TEST_SIGNAL_STRENGTH_COL]).plot()

TEST_RAWDATA = False # Default True
TEST_RAWDATA_SPIKES = True # Default True
TEST_RAWDATA_AMPLITUDE = True # Default True
TEST_RAWDATA_DROPOUT = True # Default True
Expand All @@ -626,49 +627,67 @@ def example():
TEST_RAWDATA_DISCONT_SF = False # Default False

TEST_RAWDATA_ANGLE_OF_ATTACK = False # Default False
# TEST_RAWDATA_ANGLE_OF_ATTACK_APPLICATION_DATES = [['2023-01-01', '2023-07-01']] # Default False
# TEST_RAWDATA_ANGLE_OF_ATTACK_APPLICATION_DATES = [['2023-07-01', '2023-09-01']] # Default False
TEST_RAWDATA_ANGLE_OF_ATTACK_APPLICATION_DATES = False # Default False

TEST_RAWDATA_STEADINESS_OF_HORIZONTAL_WIND = False # Default False

LEVEL2_SETTINGS = {
'signal_strength': {'signal_strength_col': TEST_SIGNAL_STRENGTH_COL, 'method': TEST_SIGNAL_STRENGTH_METHOD,
'threshold': TEST_SIGNAL_STRENGTH_THRESHOLD},
'raw_data_screening_vm97': {'spikes': TEST_RAWDATA_SPIKES, 'amplitude': TEST_RAWDATA_AMPLITUDE,
'dropout': TEST_RAWDATA_DROPOUT, 'abslim': TEST_RAWDATA_ABSLIM,
'skewkurt_hf': TEST_RAWDATA_SKEWKURT_HF, 'skewkurt_sf': TEST_RAWDATA_SKEWKURT_SF,
'discont_hf': TEST_RAWDATA_DISCONT_HF,
'discont_sf': TEST_RAWDATA_DISCONT_SF},
'ssitc': TEST_SSITC,
'gas_completeness': TEST_GAS_COMPLETENESS,
'spectral_correction_factor': TEST_SPECTRAL_CORRECTION_FACTOR,
'angle_of_attack': TEST_RAWDATA_ANGLE_OF_ATTACK,
'steadiness_of_horizontal_wind': TEST_RAWDATA_STEADINESS_OF_HORIZONTAL_WIND
'signal_strength': {
'apply': TEST_SIGNAL_STRENGTH,
'signal_strength_col': TEST_SIGNAL_STRENGTH_COL,
'method': TEST_SIGNAL_STRENGTH_METHOD,
'threshold': TEST_SIGNAL_STRENGTH_THRESHOLD},
'raw_data_screening_vm97': {
'apply': TEST_RAWDATA,
'spikes': TEST_RAWDATA_SPIKES,
'amplitude': TEST_RAWDATA_AMPLITUDE,
'dropout': TEST_RAWDATA_DROPOUT,
'abslim': TEST_RAWDATA_ABSLIM,
'skewkurt_hf': TEST_RAWDATA_SKEWKURT_HF,
'skewkurt_sf': TEST_RAWDATA_SKEWKURT_SF,
'discont_hf': TEST_RAWDATA_DISCONT_HF,
'discont_sf': TEST_RAWDATA_DISCONT_SF},
'ssitc': {
'apply': TEST_SSITC},
'gas_completeness': {
'apply': TEST_GAS_COMPLETENESS},
'spectral_correction_factor': {
'apply': TEST_SPECTRAL_CORRECTION_FACTOR},
'angle_of_attack': {
'apply': TEST_RAWDATA_ANGLE_OF_ATTACK,
'application_dates': TEST_RAWDATA_ANGLE_OF_ATTACK_APPLICATION_DATES},
'steadiness_of_horizontal_wind': {
'apply': TEST_RAWDATA_STEADINESS_OF_HORIZONTAL_WIND}
}
fpc.level2_quality_flag_expansion(**LEVEL2_SETTINGS)
fpc.finalize_level2(nighttime_threshold=NIGHTTIME_THRESHOLD, daytime_accept_qcf_below=DAYTIME_ACCEPT_QCF_BELOW,
nighttimetime_accept_qcf_below=NIGHTTIMETIME_ACCEPT_QCF_BELOW)
fpc.level2_qcf.showplot_qcf_heatmaps()
fpc.level2_qcf.report_qcf_evolution()
# fpc.level2_qcf.report_qcf_flags()
# fpc.level2.results
# fpc.fpc_df
# fpc.filteredseries
# [x for x in fpc.fpc_df.columns if 'L2' in x]

# --------------------
# Level-3.1
# --------------------
fpc.level31_storage_correction(gapfill_storage_term=False)
fpc.finalize_level31()
# fpc.level31.showplot(maxflux=50)
fpc.level31.report()
# fpc.fpc_df
# fpc.filteredseries
# fpc.level31.results
# [x for x in fpc.fpc_df.columns if 'L3.1' in x]
# # --------------------
# # Level-3.1
# # --------------------
# fpc.level31_storage_correction(gapfill_storage_term=True)
# fpc.finalize_level31()
# # fpc.level31.showplot(maxflux=50)
# fpc.level31.report()
# # fpc.fpc_df
# # fpc.filteredseries
# # fpc.level31.results
# # [x for x in fpc.fpc_df.columns if 'L3.1' in x]

# --------------------
# Level-3.2
# --------------------
fpc.level32_stepwise_outlier_detection()
# fpc.level32_stepwise_outlier_detection()

# fpc.level32_flag_manualremoval_test(
# remove_dates=[
Expand All @@ -680,28 +699,28 @@ def example():
# showplot=True, verbose=True)
# fpc.level32_addflag()

fpc.level32_flag_outliers_hampel_test(window_length=48 * 9, n_sigma=5, showplot=True, verbose=True, repeat=True)
fpc.level32_addflag()
# fpc.level32_flag_outliers_hampel_test(window_length=48 * 9, n_sigma=5, showplot=True, verbose=True, repeat=True)
# fpc.level32_addflag()

fpc.level32_flag_outliers_hampel_dtnt_test(window_length=48 * 9, n_sigma_dt=7, n_sigma_nt=5,
showplot=True, verbose=True, repeat=True)
fpc.level32_addflag()
# fpc.level32_flag_outliers_hampel_dtnt_test(window_length=48 * 9, n_sigma_dt=7, n_sigma_nt=5,
# showplot=True, verbose=True, repeat=True)
# fpc.level32_addflag()

fpc.level32_flag_outliers_zscore_rolling_test(winsize=48 * 9, thres_zscore=5, showplot=True, verbose=True,
repeat=True)
fpc.level32_addflag()
# fpc.level32_flag_outliers_zscore_rolling_test(winsize=48 * 9, thres_zscore=5, showplot=True, verbose=True,
# repeat=True)
# fpc.level32_addflag()

fpc.level32_flag_outliers_zscore_dtnt_test(thres_zscore=4, showplot=True, verbose=True, repeat=True)
fpc.level32_addflag()
# fpc.level32_flag_outliers_zscore_dtnt_test(thres_zscore=4, showplot=True, verbose=True, repeat=True)
# fpc.level32_addflag()
# fpc.level32.results # Stores Level-3.2 flags up to this point

fpc.level32_flag_outliers_localsd_test(n_sd=3, winsize=480, showplot=True, verbose=True, repeat=True)
fpc.level32_addflag()
# fpc.level32_flag_outliers_localsd_test(n_sd=3, winsize=480, showplot=True, verbose=True, repeat=True)
# fpc.level32_addflag()
# fpc.level32.results # Stores Level-3.2 flags up to this point

fpc.level32_flag_outliers_increments_zcore_test(thres_zscore=4, showplot=True, verbose=True, repeat=True)
fpc.level32_addflag()
fpc.level32.showplot_cleaned()
# fpc.level32_flag_outliers_increments_zcore_test(thres_zscore=4, showplot=True, verbose=True, repeat=True)
# fpc.level32_addflag()
# fpc.level32.showplot_cleaned()
# fpc.level32.results # Stores Level-3.2 flags up to this point

# fpc.level32_flag_outliers_lof_dtnt_test(n_neighbors=20, contamination=None, showplot=True,
Expand All @@ -720,24 +739,24 @@ def example():
# fpc.level32_addflag()
# fpc.level32.results # Stores Level-3.2 flags up to this point

fpc.level32_flag_outliers_abslim_dtnt_test(daytime_minmax=[-50, 50], nighttime_minmax=[-10, 50], showplot=True,
verbose=True)
fpc.level32_addflag()
# fpc.level32_flag_outliers_abslim_dtnt_test(daytime_minmax=[-50, 50], nighttime_minmax=[-10, 50], showplot=True,
# verbose=True)
# fpc.level32_addflag()
# fpc.level32.results # Stores Level-3.2 flags up to this point

fpc.level32_flag_outliers_trim_low_test(trim_nighttime=True, lower_limit=-20, showplot=True, verbose=True)
fpc.level32_addflag()
# fpc.level32_flag_outliers_trim_low_test(trim_nighttime=True, lower_limit=-20, showplot=True, verbose=True)
# fpc.level32_addflag()

fpc.finalize_level32(nighttime_threshold=50, daytime_accept_qcf_below=2, nighttimetime_accept_qcf_below=2)
# fpc.finalize_level32(nighttime_threshold=50, daytime_accept_qcf_below=2, nighttimetime_accept_qcf_below=2)

# fpc.filteredseries
# fpc.level32.flags
fpc.level32_qcf.showplot_qcf_heatmaps()
# fpc.level32_qcf.showplot_qcf_timeseries()
# fpc.level32_qcf.report_qcf_flags()
fpc.level32_qcf.report_qcf_evolution()
# fpc.level32_qcf.report_qcf_series()
# fpc.levelidstr
# # fpc.filteredseries
# # fpc.level32.flags
# fpc.level32_qcf.showplot_qcf_heatmaps()
# # fpc.level32_qcf.showplot_qcf_timeseries()
# # fpc.level32_qcf.report_qcf_flags()
# fpc.level32_qcf.report_qcf_evolution()
# # fpc.level32_qcf.report_qcf_series()
# # fpc.levelidstr

# fpc.filteredseries_level2_qcf
# fpc.filteredseries_level31_qcf
Expand Down
7 changes: 5 additions & 2 deletions diive/pkgs/fluxprocessingchain/level2_qualityflags.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,12 @@ def results(self) -> DataFrame:
raise Exception('Results for flux flags are empty')
return self._results

def angle_of_attack_test(self):
def angle_of_attack_test(
self,
application_dates: list or None = None
):
flag = flag_angle_of_attack_eddypro_test(df=self.dfin, flux=self.fluxcol,
idstr=self.idstr)
idstr=self.idstr, application_dates=application_dates)
self._results[flag.name] = flag

def steadiness_of_horizontal_wind(self):
Expand Down
18 changes: 13 additions & 5 deletions diive/pkgs/qaqc/eddyproflags.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
"""
Quality flags that depend on EddyPro output files.
"""

from typing import Literal

import numpy as np
import pandas as pd
from pandas import DataFrame, Series

from diive.core.funcs.funcs import validate_id_string
from diive.pkgs.qaqc.flags import restrict_application


def flag_signal_strength_eddypro_test(df: DataFrame,
Expand Down Expand Up @@ -98,7 +97,8 @@ def flag_steadiness_horizontal_wind_eddypro_test(df: DataFrame,

def flag_angle_of_attack_eddypro_test(df: DataFrame,
flux: str,
idstr: str = None) -> Series:
idstr: str = None,
application_dates: list or None = None) -> Series:
"""Flag from EddyPro output files is an integer and looks like this, e.g.: 81.
The integer contains angle-of-attack test results for the sonic anemometer.
Expand All @@ -123,13 +123,21 @@ def flag_angle_of_attack_eddypro_test(df: DataFrame,
aoa_flag = aoa_flag.astype(float)
aoa_flag = aoa_flag.replace(9, np.nan)
aoa_flag = aoa_flag.replace(1, 2) # Hard flag 1 corresponds to bad value
aoa_flag.name = flagname_out

# Apply flag only during certain time periods
if application_dates:
aoa_flag = restrict_application(flag=aoa_flag,
flagname="ANGLE OF ATTACK TEST",
application_dates=application_dates,
verbose=True,
fill_value=np.nan)

print(f"ANGLE OF ATTACK TEST: Generated new flag variable {flagname_out}, "
f"values taken from output variable {aoa_flag.name}, with "
f"flag 0 (good values) where test passed, "
f"flag 2 (bad values) where test failed ...")

aoa_flag.name = flagname_out
return aoa_flag


Expand Down
Loading

0 comments on commit 2b81037

Please sign in to comment.