Merge pull request #69 from holukas/v0.71.1

V0.71.1
holukas · Mar 15, 2024 · e303170 · e303170
2 parents da4752f + 5f198fe
commit e303170
Show file tree

Hide file tree

Showing 55 changed files with 1,191 additions and 110 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,17 @@
 
 ![DIIVE](images/logo_diive1_256px.png)
 
+## v0.71.1 | 15 Mar 2024
+
+### Bugfixes & changes
+
+- Fixed: Replaced all references to old filetypes using the underscore to their respective new filetype names,
+  e.g. all occurrences of `EDDYPRO_FLUXNET_30MIN` were replaced with the new name `EDDYPRO-FLUXNET-30MIN`.
+- Environment: Python 3.11 is now allowed in `pyproject.toml`: `python = ">=3.9,<3.12"`
+- Environment: Removed `fitter` library from dependencies, was not used.
+- Docs: Testing documentation generation using [Sphinx](https://www.sphinx-doc.org/en/master/), although it looks very
+  rough at the moment.
+
 ## v0.71.0 | 14 Mar 2024
 
 ### High-resolution update

diff --git a/diive/configs/exampledata/__init__.py b/diive/configs/exampledata/__init__.py
@@ -18,7 +18,7 @@ def load_exampledata_parquet() -> DataFrame:
 
 def load_exampledata_DIIVE_CSV_30MIN():
     filepath = Path(DIR_PATH) / 'exampledata_CH-DAV_FP2022.5_2022.07_ID20230206154316_30MIN.diive.csv'
-    loaddatafile = ReadFileType(filetype='DIIVE_CSV_30MIN',
+    loaddatafile = ReadFileType(filetype='DIIVE-CSV-30MIN',
                                 filepath=filepath,
                                 data_nrows=None)
     data_df, metadata_df = loaddatafile.get_filedata()
@@ -28,7 +28,7 @@ def load_exampledata_DIIVE_CSV_30MIN():
 def load_exampledata_eddypro_fluxnet_CSV_30MIN():
     filepath = Path(
         DIR_PATH) / 'exampledata_CH-AWS_2022.07_FR-20220127-164245_eddypro_fluxnet_2022-01-28T112538_adv.csv'
-    loaddatafile = ReadFileType(filetype='EDDYPRO_FLUXNET_30MIN',
+    loaddatafile = ReadFileType(filetype='EDDYPRO-FLUXNET-30MIN',
                                 filepath=filepath,
                                 data_nrows=None)
     data_df, metadata_df = loaddatafile.get_filedata()

diff --git a/diive/core/io/filereader.py b/diive/core/io/filereader.py
@@ -564,11 +564,11 @@ def _parse_file(self, headercols_list):
 
 def example_icosfile():
     FILE = r"L:\Sync\luhk_work\20 - CODING\21 - DIIVE\diive\diive\configs\exampledata\CH-Dav_BM_20230328_L02_F03.zip"
-    rft = ReadFileType(filepath=FILE, filetype='ICOS_H2R_CSVZIP_10S', output_middle_timestamp=True)
+    rft = ReadFileType(filepath=FILE, filetype='ICOS-H2R-CSVZIP-10S', output_middle_timestamp=True)
     df, meta = rft.get_filedata()
 
     # # Read all original data files to dataframe, convert timestamp index to show TIMESTAMP_MIDDLE
-    # orig = MultiDataFileReader(filepaths=origfiles, filetype='ICOS_H2R_CSVZIP_10S', output_middle_timestamp=True)
+    # orig = MultiDataFileReader(filepaths=origfiles, filetype='ICOS-H2R-CSVZIP-10S', output_middle_timestamp=True)
     # origdf = orig.data_df
     # origmeta = orig.metadata_df
 
@@ -586,7 +586,7 @@ def example_ep_fluxnet():
     #              and fp.stem.endswith("_adv")]
     print(filepaths)
 
-    loaddatafile = MultiDataFileReader(filetype='EDDYPRO_FLUXNET_30MIN', filepaths=filepaths)
+    loaddatafile = MultiDataFileReader(filetype='EDDYPRO-FLUXNET-30MIN', filepaths=filepaths)
     df = loaddatafile.data_df
 
     # # Store original column order
@@ -615,9 +615,9 @@ def example_toa5():
     corrected = r"C:\Users\holukas\Downloads\corrected_files\c-CH-OE2_iDL_BOX1_0_1_TBL1_20220629-1714.dat"
     uncorrected = r"C:\Users\holukas\Downloads\corrected_files\CH-OE2_iDL_BOX1_0_1_TBL1_20220629-1714.dat"
 
-    corr_df, corr_meta = ReadFileType(filepath=corrected, filetype='TOA5_DAT_1MIN',
+    corr_df, corr_meta = ReadFileType(filepath=corrected, filetype='TOA5-DAT-1MIN',
                                       output_middle_timestamp=True).get_filedata()
-    uncorr_df, uncorr_meta = ReadFileType(filepath=uncorrected, filetype='TOA5_DAT_1MIN',
+    uncorr_df, uncorr_meta = ReadFileType(filepath=uncorrected, filetype='TOA5-DAT-1MIN',
                                           output_middle_timestamp=True).get_filedata()
 
     corr_descr = corr_df.describe()

diff --git a/diive/core/plotting/cumulative.py b/diive/core/plotting/cumulative.py
@@ -145,7 +145,7 @@ def example():
     # # Test data
     # from diive.core.io.filereader import ReadFileType
     # loaddatafile = ReadFileType(
-    #     filetype='DIIVE_CSV_30MIN',
+    #     filetype='DIIVE-CSV-30MIN',
     #     filepath=r"M:\Downloads\_temp\CH_LAE_FP2021_2004-2020_ID20210607205711.diive.csv",
     #     # filepath=r"F:\Dropbox\luhk_work\_current\fp2022\7-14__IRGA627572__addingQCF0\CH-DAV_FP2022.1_1997-2022.08_ID20220826234456_30MIN.diive.csv",
     #     data_nrows=None)

diff --git a/diive/core/plotting/heatmap_datetime.py b/diive/core/plotting/heatmap_datetime.py
@@ -211,7 +211,7 @@ def example():
     # Load file
     from diive.core.io.filereader import ReadFileType
     loaddatafile = ReadFileType(
-        filetype='GENERIC-CSV_HEADER-1ROW_TS-END-FULL_1MIN',
+        filetype='GENERIC-CSV-HEADER-1ROW-TS-END-FULL-1MIN',
         # filepath=r"M:\Downloads\_temp\orig.csv",
         filepath=r"M:\Downloads\_temp\db.csv",
         data_nrows=None)

diff --git a/diive/core/plotting/scatter.py b/diive/core/plotting/scatter.py
@@ -170,7 +170,7 @@ def example():
     # filepaths = search_files(FOLDER, "*.csv")
     # filepaths = [fp for fp in filepaths if "_fluxnet_" in fp.stem and fp.stem.endswith("_adv")]
     # print(filepaths)
-    # fr = MultiDataFileReader(filetype='EDDYPRO_FLUXNET_30MIN', filepaths=filepaths)
+    # fr = MultiDataFileReader(filetype='EDDYPRO-FLUXNET-30MIN', filepaths=filepaths)
     # df = fr.data_df
     # from diive.core.io.files import save_parquet
     # save_parquet(outpath=FOLDER, filename="data", data=df)

diff --git a/diive/pkgs/analyses/__indev__aggregator.py b/diive/pkgs/analyses/__indev__aggregator.py
@@ -23,7 +23,7 @@ def example():
     SOURCEFILE = r"F:\Downloads\_temp2\wsl_19256_data_10min.csv"
 
     # Read data from precip files to dataframe
-    rft = ReadFileType(filepath=SOURCEFILE, filetype='TOA5_CSV_10MIN', output_middle_timestamp=True)
+    rft = ReadFileType(filepath=SOURCEFILE, filetype='TOA5-CSV-10MIN', output_middle_timestamp=True)
     df = rft.data_df
 
     d = df[['ghi_Avg']].copy()

diff --git a/diive/pkgs/analyses/histogram.py b/diive/pkgs/analyses/histogram.py
@@ -128,7 +128,7 @@ def _binning_method(self):
 def example():
     # # from diive.core.io.filereader import ReadFileType
     # # SOURCE = r"F:\01-NEW\FF202303\FRU\Level-0_OPENLAG_results_2005-2022\OUT_DIIVE-20230410-020904\winddir_Dataset_DIIVE-20230410-020904_Original-30T.diive.csv"
-    # # loaddatafile = ReadFileType(filetype='DIIVE_CSV_30MIN', filepath=SOURCE, data_nrows=None)
+    # # loaddatafile = ReadFileType(filetype='DIIVE-CSV-30MIN', filepath=SOURCE, data_nrows=None)
     # # data_df, metadata_df = loaddatafile.get_filedata()
     #
     # # from diive.core.io.files import load_pickle, save_as_pickle

diff --git a/diive/pkgs/corrections/winddiroffset.py b/diive/pkgs/corrections/winddiroffset.py
@@ -145,7 +145,7 @@ def example():
     # # Load example data
     # from diive.core.io.filereader import ReadFileType
     # SOURCE = r"F:\01-NEW\FF202303\FRU\Level-0_OPENLAG_results_2005-2022\OUT_DIIVE-20230410-020904\winddir_Dataset_DIIVE-20230410-020904_Original-30T.diive.csv"
-    # loaddatafile = ReadFileType(filetype='DIIVE_CSV_30MIN', filepath=SOURCE, data_nrows=None)
+    # loaddatafile = ReadFileType(filetype='DIIVE-CSV-30MIN', filepath=SOURCE, data_nrows=None)
     # data_df, metadata_df = loaddatafile.get_filedata()
 
     # # Save data as pickle for faster loading

diff --git a/diive/pkgs/flux/common.py b/diive/pkgs/flux/common.py
@@ -24,12 +24,12 @@
 
 
 def detect_basevar(fluxcol: str,
-                   filetype: Literal['EDDYPRO_FLUXNET_30MIN', 'EDDYPRO_FULL_OUTPUT_30MIN']) -> str:
+                   filetype: Literal['EDDYPRO-FLUXNET-30MIN', 'EDDYPRO-FULL-OUTPUT-30MIN']) -> str:
     """Detect name of base variable that was used to calculate
     the respective flux."""
-    if filetype == 'EDDYPRO_FLUXNET_30MIN':
+    if filetype == 'EDDYPRO-FLUXNET-30MIN':
         basevar = basevars_fluxnetfile[fluxcol]
-    elif filetype == 'EDDYPRO_FULL_OUTPUT_30MIN':
+    elif filetype == 'EDDYPRO-FULL-OUTPUT-30MIN':
         basevar = basevars_fulloutputfile[fluxcol]
     else:
         raise Exception(f"(!) Filetype {filetype} is not defined. No basevar could be detected for {fluxcol}.")

diff --git a/diive/pkgs/flux/ustar_detection.py b/diive/pkgs/flux/ustar_detection.py
@@ -145,7 +145,7 @@ def example():
     # filepaths = search_files(FOLDER, "*.csv")
     # filepaths = [fp for fp in filepaths if "_fluxnet_" in fp.stem and fp.stem.endswith("_adv")]
     # print(filepaths)
-    # fr = MultiDataFileReader(filetype='EDDYPRO_FLUXNET_30MIN', filepaths=filepaths)
+    # fr = MultiDataFileReader(filetype='EDDYPRO-FLUXNET-30MIN', filepaths=filepaths)
     # df = fr.data_df
     # from diive.core.io.files import save_parquet
     # save_parquet(outpath=FOLDER, filename="data", data=df)

diff --git a/diive/pkgs/fluxprocessingchain/fluxprocessingchain.py b/diive/pkgs/fluxprocessingchain/fluxprocessingchain.py
@@ -22,7 +22,7 @@ class FluxProcessingChain:
     def __init__(
             self,
             maindf: DataFrame,
-            filetype: Literal['EDDYPRO_FLUXNET_30MIN', 'EDDYPRO_FULL_OUTPUT_30MIN'],
+            filetype: Literal['EDDYPRO-FLUXNET-30MIN', 'EDDYPRO-FULL-OUTPUT-30MIN'],
             fluxcol: str,
             site_lat: float,
             site_lon: float,
@@ -397,7 +397,7 @@ class LoadEddyProOutputFiles:
     def __init__(
             self,
             sourcedir: str or list,
-            filetype: Literal['EDDYPRO_FLUXNET_30MIN', 'EDDYPRO_FULL_OUTPUT_30MIN']
+            filetype: Literal['EDDYPRO-FLUXNET-30MIN', 'EDDYPRO-FULL-OUTPUT-30MIN']
     ):
         self.sourcedir = sourcedir
         self.filetype = filetype
@@ -443,9 +443,9 @@ def loadfiles(self):
         self._metadata = loaddatafile.metadata_df
 
     def _init_filetype(self):
-        if self.filetype == 'EDDYPRO_FLUXNET_30MIN':
+        if self.filetype == 'EDDYPRO-FLUXNET-30MIN':
             fileids = ['eddypro_', '_fluxnet_']
-        elif self.filetype == 'EDDYPRO_FULL_OUTPUT_30MIN':
+        elif self.filetype == 'EDDYPRO-FULL-OUTPUT-30MIN':
             fileids = ['eddypro_', '_full_output_']
         else:
             raise Exception("Filetype is unknown.")
@@ -553,7 +553,7 @@ def example_quick():
         sourcedirs=[r'L:\Sync\luhk_work\CURRENT\fru\Level-1_results_fluxnet_2022'],
         site_lat=47.115833,
         site_lon=8.537778,
-        filetype='EDDYPRO_FLUXNET_30MIN',
+        filetype='EDDYPRO-FLUXNET-30MIN',
         utc_offset=1,
         nighttime_threshold=50,
         daytime_accept_qcf_below=2,
@@ -567,7 +567,7 @@ def example():
         r'L:\Sync\luhk_work\CURRENT\fru\Level-1_results_fluxnet_2022']  # Folders where the EddyPro output files are located
     SITE_LAT = 47.115833  # Latitude of site
     SITE_LON = 8.537778  # Longitude of site
-    FILETYPE = 'EDDYPRO_FLUXNET_30MIN'  # Filetype of EddyPro output files, can be 'EDDYPRO_FLUXNET_30MIN' or 'EDDYPRO_FULL_OUTPUT_30MIN'
+    FILETYPE = 'EDDYPRO-FLUXNET-30MIN'  # Filetype of EddyPro output files, can be 'EDDYPRO-FLUXNET-30MIN' or 'EDDYPRO-FULL-OUTPUT-30MIN'
     UTC_OFFSET = 1  # Time stamp offset in relation to UTC, e.g. 1 for UTC+01:00 (CET), important for the calculation of potential radiation for detecting daytime and nighttime
     NIGHTTIME_THRESHOLD = 50  # Threshold for potential radiation in W m-2, conditions below threshold are nighttime
     DAYTIME_ACCEPT_QCF_BELOW = 2

diff --git a/diive/pkgs/fluxprocessingchain/level2_qualityflags.py b/diive/pkgs/fluxprocessingchain/level2_qualityflags.py
@@ -19,7 +19,7 @@ def __init__(self,
                  dfin: DataFrame,
                  fluxcol: str,
                  basevar: str,
-                 filetype: Literal['EDDYPRO_FLUXNET_30MIN', 'EDDYPRO_FULL_OUTPUT_30MIN'],
+                 filetype: Literal['EDDYPRO-FLUXNET-30MIN', 'EDDYPRO-FULL-OUTPUT-30MIN'],
                  idstr: str = None,
                  units: dict = None):
         """
@@ -33,14 +33,14 @@ def __init__(self,
             filetype: Filetype of the input file.
             basevar: Name of the variable that was used to calculate the flux, e.g. 'CO2_CONC' for CO2 flux.
             units: Dictionary of columns names and their units, only needed
-                when *filetype='EDDYPRO_FULL_OUTPUT_30MIN'*.
+                when *filetype='EDDYPRO-FULL-OUTPUT-30MIN'*.
         """
         self.fluxcol = fluxcol
         self.dfin = dfin.copy()
 
-        if not units and filetype == 'EDDYPRO_FULL_OUTPUT_30MIN':
+        if not units and filetype == 'EDDYPRO-FULL-OUTPUT-30MIN':
             raise Exception("ERROR: No units found. Units are needed when working "
-                            "with filetype EDDYPRO_FULL_OUTPUT_30MIN.")
+                            "with filetype EDDYPRO-FULL-OUTPUT-30MIN.")
 
         self.units = units
         self.idstr = validate_id_string(idstr=idstr)
@@ -93,9 +93,9 @@ def raw_data_screening_vm97_tests(
             discont_hf=discont_hf,
             discont_sf=discont_sf,
         )
-        if self.filetype == 'EDDYPRO_FLUXNET_30MIN':
+        if self.filetype == 'EDDYPRO-FLUXNET-30MIN':
             flags = flags_vm97_eddypro_fluxnetfile_tests(**kwargs)
-        elif self.filetype == 'EDDYPRO_FULL_OUTPUT_30MIN':
+        elif self.filetype == 'EDDYPRO-FULL-OUTPUT-30MIN':
             flags = flags_vm97_eddypro_fulloutputfile_tests(**kwargs)
         else:
             raise Exception(f"Filetype {self.filetype.__name__} unkown.")

diff --git a/diive/pkgs/fluxprocessingchain/level31_storagecorrection.py b/diive/pkgs/fluxprocessingchain/level31_storagecorrection.py
@@ -21,7 +21,7 @@ def __init__(self,
                  df: DataFrame,
                  fluxcol: str,
                  basevar: str,
-                 filetype: Literal['EDDYPRO_FLUXNET_30MIN', 'EDDYPRO_FULL_OUTPUT_30MIN'],
+                 filetype: Literal['EDDYPRO-FLUXNET-30MIN', 'EDDYPRO-FULL-OUTPUT-30MIN'],
                  gapfill_storage_term: bool = False,
                  idstr: str = 'L3.1'):
         self.df = df.copy()
@@ -156,7 +156,7 @@ def _detect_storage_var(self) -> tuple[str, str]:
 
         flux_corrected_col = None
 
-        if self.filetype == 'EDDYPRO_FLUXNET_30MIN':
+        if self.filetype == 'EDDYPRO-FLUXNET-30MIN':
             options = {
                 'FC': 'SC_SINGLE',
                 'FH2O': 'SH2O_SINGLE',
@@ -169,7 +169,7 @@ def _detect_storage_var(self) -> tuple[str, str]:
             if self.fluxcol == 'FC':
                 flux_corrected_col = f'NEE{self.idstr}'
 
-        elif self.filetype == 'EDDYPRO_FULL_OUTPUT_30MIN':
+        elif self.filetype == 'EDDYPRO-FULL-OUTPUT-30MIN':
             options = {
                 'co2_flux': 'co2_strg',
                 'h2o_flux': 'h2o_strg',

diff --git a/diive/pkgs/formats/fluxnet.py b/diive/pkgs/formats/fluxnet.py
@@ -128,7 +128,7 @@ def subset_fluxnet(self) -> DataFrame:
         return self._subset_fluxnet
 
     def mergefiles(self, limit_n_files: int = None):
-        self._merged_df = loadfiles(filetype='EDDYPRO_FLUXNET_30MIN',
+        self._merged_df = loadfiles(filetype='EDDYPRO-FLUXNET-30MIN',
                                     sourcedir=self.sourcedir,
                                     limit_n_files=limit_n_files,
                                     fileext='.csv',