From 8b9a7c33eddcbd965a0f4306fe804bfeb7bc7ad3 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 20 Jun 2025 10:43:20 -0700 Subject: [PATCH 1/2] Ensure that one value of depth_threshold is used and set the default to 10. --- src/data/resample.py | 24 +++++++++++++++++++----- src/data/test_process_dorado.py | 4 ++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/data/resample.py b/src/data/resample.py index 92796dee..4a8d6424 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -35,6 +35,7 @@ PLOT_SECONDS = 300 AUVCTD_OPENDAP_BASE = "http://dods.mbari.org/opendap/data/auvctd" FLASH_THRESHOLD = 1.0e11 +DEPTH_THRESHOLD = 10.0 # meters class InvalidAlignFile(Exception): @@ -431,7 +432,7 @@ def select_nighttime_bl_raw( self.logger.info("No sunset or sunrise found during this mission.") return nighttime_bl_raw, sunsets, sunrises - def add_profile(self, depth_threshold: float = 15) -> None: + def add_profile(self, depth_threshold: float) -> None: # Find depth vertices value using scipy's find_peaks algorithm options = {"prominence": 10, "width": 30} peaks_pos, _ = signal.find_peaks(self.resampled_nc["depth"], **options) @@ -749,11 +750,11 @@ def correct_biolume_proxies( # noqa: C901, PLR0912, PLR0913, PLR0915 biolume_fluo: pd.Series, # from add_biolume_proxies biolume_sunsets: list[datetime], # from add_biolume_proxies biolume_sunrises: list[datetime], # from add_biolume_proxies + depth_threshold: float, adinos_threshold: float = 0.1, correction_threshold: int = 3, fluo_bl_threshold: float = 0.4, corr_type: str = "pearson", # "spearman" or "pearson" - depth_threshold: float = 15.0, minutes_from_surface_threshold: int = 5, ) -> None: variables = [ @@ -1002,6 +1003,7 @@ def resample_variable( # noqa: PLR0913 mission_start: pd.Timestamp, mission_end: pd.Timestamp, instrs_to_pad: dict[str, timedelta], + depth_threshold: float, ) -> None: timevar = f"{instr}_{TIME}" if instr == "biolume" and variable == "biolume_raw": @@ -1013,7 +1015,12 @@ def resample_variable( # noqa: PLR0913 proxy_cal_factor=proxy_cal_factor, proxy_ratio_adinos=proxy_ratio_adinos, ) - self.correct_biolume_proxies(biolume_fluo, biolume_sunsets, biolume_sunrises) + self.correct_biolume_proxies( + biolume_fluo, + biolume_sunsets, + biolume_sunrises, + depth_threshold, + ) else: self.df_o[variable] = self.ds[variable].to_pandas() self.df_o[f"{variable}_mf"] = ( @@ -1164,13 +1171,18 @@ def get_mission_start_end( ) return mission_start, mission_end, instrs_to_pad - def resample_mission( # noqa: C901, PLR0912, PLR0915 + def resample_mission( # noqa: C901, PLR0912, PLR0915, PLR0913 self, nc_file: str, # align.nc file mf_width: int = MF_WIDTH, freq: str = FREQ, plot_seconds: float = PLOT_SECONDS, + depth_threshold: float = DEPTH_THRESHOLD, ) -> None: + # Change depth_threshold here should a particular mission require it, e.g.: + # if "2023.192.01" in nc_file: ... + self.logger.info("Using depth_threshold = %.2f m", depth_threshold) + pd.options.plotting.backend = "matplotlib" self.ds = xr.open_dataset(nc_file) mission_start, mission_end, instrs_to_pad = self.get_mission_start_end(nc_file) @@ -1202,7 +1214,7 @@ def resample_mission( # noqa: C901, PLR0912, PLR0915 self.save_coordinates(instr, mf_width, freq, aggregator) if self.args.plot: self.plot_coordinates(instr, freq, plot_seconds) - self.add_profile() + self.add_profile(depth_threshold=depth_threshold) if instr != last_instr: # Start with new dataframes for each instrument self.df_o = pd.DataFrame() @@ -1219,6 +1231,7 @@ def resample_mission( # noqa: C901, PLR0912, PLR0915 mission_start, mission_end, instrs_to_pad, + depth_threshold, ) for var in self.df_r: if var not in variables: @@ -1243,6 +1256,7 @@ def resample_mission( # noqa: C901, PLR0912, PLR0915 mission_start, mission_end, instrs_to_pad, + depth_threshold, ) self.df_r[variable].index.rename("time", inplace=True) # noqa: PD002 self.resampled_nc[variable] = self.df_r[variable].to_xarray() diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index 875a1a0f..8803a5d9 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -51,8 +51,8 @@ def test_process_dorado(complete_dorado_processing): if check_md5: # Check that the MD5 hash has not changed EXPECTED_MD5_GITHUB = "11f9a455dfae3bad24aa485181ef5384" - EXPECTED_MD5_ACT = "ec95eabdc1450aefee4667808bc46d92" - EXPECTED_MD5_LOCAL = "d9754a20d1c8ac6ddbb1a62d75aa507e" + EXPECTED_MD5_ACT = "bdb9473e5dedb694618f518b8cf0ca1e" + EXPECTED_MD5_LOCAL = "6ecb2229b00835055619e982fe9d5023" if str(proc.args.base_path).startswith("/home/runner"): # The MD5 hash is different in GitHub Actions, maybe due to different metadata assert hashlib.md5(open(nc_file, "rb").read()).hexdigest() == EXPECTED_MD5_GITHUB # noqa: PTH123, S101, S324, SIM115 From 8c09a54a4a81eede4f48859ace94ca4d4c539212 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 20 Jun 2025 10:47:59 -0700 Subject: [PATCH 2/2] Update the EXPECTED_MD5_GITHUB value. We expect this change as the depth_threshold change from 15 to 10 causes changes in the netCDF data. --- src/data/test_process_dorado.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index 8803a5d9..3eb1033c 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -50,7 +50,7 @@ def test_process_dorado(complete_dorado_processing): check_md5 = True if check_md5: # Check that the MD5 hash has not changed - EXPECTED_MD5_GITHUB = "11f9a455dfae3bad24aa485181ef5384" + EXPECTED_MD5_GITHUB = "6550bb8ed5919f21413f30dfffdcf116" EXPECTED_MD5_ACT = "bdb9473e5dedb694618f518b8cf0ca1e" EXPECTED_MD5_LOCAL = "6ecb2229b00835055619e982fe9d5023" if str(proc.args.base_path).startswith("/home/runner"):