From 9ebd42bf43c68ee7e5a955396f668c9fa7259666 Mon Sep 17 00:00:00 2001
From: Michael Anderson <anderson@mbari.org>
Date: Mon, 14 Apr 2025 19:14:11 -0700
Subject: [PATCH 1/9] draft adding proxy corrections

Signed-off-by: Michael Anderson <anderson@mbari.org>
---
 src/data/resample.py | 86 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)

diff --git a/src/data/resample.py b/src/data/resample.py
index 162a5028..639e5d93 100755
--- a/src/data/resample.py
+++ b/src/data/resample.py
@@ -664,6 +664,92 @@ def add_biolume_proxies(  # noqa: PLR0913, PLR0915
                 f" = {proxy_ratio_adinos:.4e} and proxy_cal_factor = {proxy_cal_factor:.6f}"
             )
 
+    def correct_biolume_proxies(
+        self,
+        adinos_threshold: float = 0.1,
+        correction_threshold: int = 3,
+        fluoBL_threshold: float = 0.25,  # use 0.45 for pearson corr
+        corr_type: str = 'spearman',  # or pearson
+        depth_threshold: float = 2.0,
+        minutes_from_surface_threshold: int = 5
+    ) -> None:
+        df = self.df_r[
+                 [
+                  'depth',
+                  'biolum_proxy_diatoms', 'biolum_proxy_adinos', 'biolum_proxy_hdinos',
+                  'fluo', 'bg_biolum'
+                 ]
+             ].copy(deep=True)
+        df['fluoBL_corr'] = \
+            ('time', np.full_like(self.resampled_nc["profile_number"], np.nan))
+
+        profile_series = self.resampled_nc['profile_number'].to_series()
+        df['profile_number'] = profile_series.reindex(self.df_r.index, method='ffill')
+        # make unique profiles across all surveys
+        profil = np.cumsum(np.abs(np.diff(df['profile_number'], prepend=0)))
+
+        # new proxies are the "N" fields
+        for new, old in zip(
+            ['diatomsN','adinosN','hdinosN'],
+            ['biolum_proxy_diatoms','biolum_proxy_adinos','biolum_proxy_hdinos']
+        ):
+            df[new] = ('time', np.full_like(df[old], np.nan))
+
+        # compute correlation per profil and then correct proxies
+        dt_5mins = np.timedelta64(timedelta(minutes=minutes_from_surface_threshold))
+        for iprofil_ in tqdm(range(1, int(np.max(profil)))):
+            iprofil = (profil == iprofil_)
+            # excludes surface, must be within 5 min of it
+            ideep = iprofil & (df.depth > depth_threshold)
+            itime = (df.index > (df.index[ideep].min() - dt_5mins)) & \
+                    (df.index < (df.index[ideep].max() + dt_5mins))
+            iprofil = iprofil & itime
+            if not np.any(iprofil):
+                #print(f'no corrections possible for {iprofil_=}')
+                continue
+            auv_profil = df.loc[iprofil]
+            if auv_profil.shape[0] > correction_threshold:
+                if np.sum(auv_profil.adinos > adinos_threshold) < correction_threshold:
+                    # no correction for low fluo & biolum values
+                    fluoBL_corr = 1.0
+                else:
+                    # correlation between fluo and bg_biolum computed on high
+                    # adino values for each profile
+                    idepth = auv_profil.depth < auv_profil.depth[
+                                                    auv_profil.adinos > adinos_threshold
+                                                ].max()
+                    # pandas' corr ignores NaN
+                    auv_profil_idepth = auv_profil[['fluo', 'bg_biolum']].loc[idepth]
+                    fluoBL_corr = auv_profil_idepth.fluo.corr(auv_profil_idepth.bg_biolum,
+                                                              method=corr_type)
+
+                # save correlation
+                df.fluoBL_corr[iprofil] = fluoBL_corr
+
+                # scale between 0 and 1 first
+                fluoBL_correctionfactor = (fluoBL_corr + 1.0) / 2.0
+
+                # then scale between fluoBL_threshold and 1
+                fluoBL_correctionfactor = \
+                    fluoBL_correctionfactor * (1.0 - fluoBL_threshold) + fluoBL_threshold
+
+                # can happen if fluoBL_threshold is negative
+                fluoBL_correctionfactor = max(fluoBL_correctionfactor, 0.0)
+
+                df.adinosN[iprofil] = df.adinos[iprofil] * fluoBL_correctionfactor
+
+                # preserving adinos+diatoms
+                df.diatomsN[iprofil] = \
+                    df.adinos[iprofil] + df.diatoms[iprofil] - df.adinosN[iprofil]
+
+                # preserving adinos+hdinos
+                df.hdinosN[iprofil] = \
+                    df.adinos[iprofil] + df.hdinos[iprofil] - df.adinosN[iprofil]
+
+                self.df_r.biolum_proxy_adinos[iprofil] = df.adinosN[iprofil]
+                self.df_r.biolum_proxy_diatoms[iprofil] = df.diatomsN[iprofil]
+                self.df_r.biolum_proxy_hdinos[iprofil] = df.hdinosN[iprofil]
+
     def resample_variable(  # noqa: PLR0913
         self,
         instr: str,

From 7e1453f47be56b2d869b2df40ce26e4ff32ceed3 Mon Sep 17 00:00:00 2001
From: Michael Anderson <anderson@mbari.org>
Date: Mon, 14 Apr 2025 19:24:32 -0700
Subject: [PATCH 2/9] linters

Signed-off-by: Michael Anderson <anderson@mbari.org>
---
 src/data/resample.py | 60 ++++++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/data/resample.py b/src/data/resample.py
index 639e5d93..b85d6165 100755
--- a/src/data/resample.py
+++ b/src/data/resample.py
@@ -664,50 +664,50 @@ def add_biolume_proxies(  # noqa: PLR0913, PLR0915
                 f" = {proxy_ratio_adinos:.4e} and proxy_cal_factor = {proxy_cal_factor:.6f}"
             )
 
-    def correct_biolume_proxies(
+    def correct_biolume_proxies(  # noqa: PLR0913
         self,
         adinos_threshold: float = 0.1,
         correction_threshold: int = 3,
-        fluoBL_threshold: float = 0.25,  # use 0.45 for pearson corr
+        fluo_bl_threshold: float = 0.25,  # use 0.45 for pearson corr
         corr_type: str = 'spearman',  # or pearson
         depth_threshold: float = 2.0,
         minutes_from_surface_threshold: int = 5
     ) -> None:
-        df = self.df_r[
-                 [
-                  'depth',
-                  'biolum_proxy_diatoms', 'biolum_proxy_adinos', 'biolum_proxy_hdinos',
-                  'fluo', 'bg_biolum'
-                 ]
-             ].copy(deep=True)
-        df['fluoBL_corr'] = \
+        df_p = self.df_r[
+                   [
+                    'depth',
+                    'biolum_proxy_diatoms', 'biolum_proxy_adinos', 'biolum_proxy_hdinos',
+                    'fluo', 'bg_biolum'
+                   ]
+               ].copy(deep=True)
+        df_p['fluoBL_corr'] = \
             ('time', np.full_like(self.resampled_nc["profile_number"], np.nan))
 
         profile_series = self.resampled_nc['profile_number'].to_series()
-        df['profile_number'] = profile_series.reindex(self.df_r.index, method='ffill')
+        df_p['profile_number'] = profile_series.reindex(df_p.index, method='ffill')
         # make unique profiles across all surveys
-        profil = np.cumsum(np.abs(np.diff(df['profile_number'], prepend=0)))
+        profil = np.cumsum(np.abs(np.diff(df_p['profile_number'], prepend=0)))
 
         # new proxies are the "N" fields
         for new, old in zip(
             ['diatomsN','adinosN','hdinosN'],
             ['biolum_proxy_diatoms','biolum_proxy_adinos','biolum_proxy_hdinos']
         ):
-            df[new] = ('time', np.full_like(df[old], np.nan))
+            df_p[new] = ('time', np.full_like(df_p[old], np.nan))
 
         # compute correlation per profil and then correct proxies
         dt_5mins = np.timedelta64(timedelta(minutes=minutes_from_surface_threshold))
-        for iprofil_ in tqdm(range(1, int(np.max(profil)))):
+        for iprofil_ in range(1, int(np.max(profil))):
             iprofil = (profil == iprofil_)
             # excludes surface, must be within 5 min of it
-            ideep = iprofil & (df.depth > depth_threshold)
-            itime = (df.index > (df.index[ideep].min() - dt_5mins)) & \
-                    (df.index < (df.index[ideep].max() + dt_5mins))
+            ideep = iprofil & (df_p.depth > depth_threshold)
+            itime = (df_p.index > (df_p.index[ideep].min() - dt_5mins)) & \
+                    (df_p.index < (df_p.index[ideep].max() + dt_5mins))
             iprofil = iprofil & itime
             if not np.any(iprofil):
                 #print(f'no corrections possible for {iprofil_=}')
                 continue
-            auv_profil = df.loc[iprofil]
+            auv_profil = df_p.loc[iprofil]
             if auv_profil.shape[0] > correction_threshold:
                 if np.sum(auv_profil.adinos > adinos_threshold) < correction_threshold:
                     # no correction for low fluo & biolum values
@@ -724,31 +724,31 @@ def correct_biolume_proxies(
                                                               method=corr_type)
 
                 # save correlation
-                df.fluoBL_corr[iprofil] = fluoBL_corr
+                df_p.fluoBL_corr[iprofil] = fluoBL_corr
 
                 # scale between 0 and 1 first
                 fluoBL_correctionfactor = (fluoBL_corr + 1.0) / 2.0
 
-                # then scale between fluoBL_threshold and 1
+                # then scale between fluo_bl_threshold and 1
                 fluoBL_correctionfactor = \
-                    fluoBL_correctionfactor * (1.0 - fluoBL_threshold) + fluoBL_threshold
+                    fluoBL_correctionfactor * (1.0 - fluo_bl_threshold) + fluo_bl_threshold
 
-                # can happen if fluoBL_threshold is negative
+                # can happen if fluo_bl_threshold is negative
                 fluoBL_correctionfactor = max(fluoBL_correctionfactor, 0.0)
 
-                df.adinosN[iprofil] = df.adinos[iprofil] * fluoBL_correctionfactor
+                df_p.adinosN[iprofil] = df_p.adinos[iprofil] * fluoBL_correctionfactor
 
                 # preserving adinos+diatoms
-                df.diatomsN[iprofil] = \
-                    df.adinos[iprofil] + df.diatoms[iprofil] - df.adinosN[iprofil]
+                df_p.diatomsN[iprofil] = \
+                    df_p.adinos[iprofil] + df_p.diatoms[iprofil] - df_p.adinosN[iprofil]
 
                 # preserving adinos+hdinos
-                df.hdinosN[iprofil] = \
-                    df.adinos[iprofil] + df.hdinos[iprofil] - df.adinosN[iprofil]
+                df_p.hdinosN[iprofil] = \
+                    df_p.adinos[iprofil] + df_p.hdinos[iprofil] - df_p.adinosN[iprofil]
 
-                self.df_r.biolum_proxy_adinos[iprofil] = df.adinosN[iprofil]
-                self.df_r.biolum_proxy_diatoms[iprofil] = df.diatomsN[iprofil]
-                self.df_r.biolum_proxy_hdinos[iprofil] = df.hdinosN[iprofil]
+                self.df_r.biolum_proxy_adinos[iprofil] = df_p.adinosN[iprofil]
+                self.df_r.biolum_proxy_diatoms[iprofil] = df_p.diatomsN[iprofil]
+                self.df_r.biolum_proxy_hdinos[iprofil] = df_p.hdinosN[iprofil]
 
     def resample_variable(  # noqa: PLR0913
         self,

From 0463f8064e9c368a3e4ca9f3c818f8559ddc1473 Mon Sep 17 00:00:00 2001
From: Michael Anderson <anderson@mbari.org>
Date: Mon, 14 Apr 2025 19:43:57 -0700
Subject: [PATCH 3/9] ruff linter

Signed-off-by: Michael Anderson <anderson@mbari.org>
---
 src/data/resample.py | 69 +++++++++++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 30 deletions(-)

diff --git a/src/data/resample.py b/src/data/resample.py
index b85d6165..822145d5 100755
--- a/src/data/resample.py
+++ b/src/data/resample.py
@@ -669,43 +669,47 @@ def correct_biolume_proxies(  # noqa: PLR0913
         adinos_threshold: float = 0.1,
         correction_threshold: int = 3,
         fluo_bl_threshold: float = 0.25,  # use 0.45 for pearson corr
-        corr_type: str = 'spearman',  # or pearson
+        corr_type: str = "spearman",  # or pearson
         depth_threshold: float = 2.0,
-        minutes_from_surface_threshold: int = 5
+        minutes_from_surface_threshold: int = 5,
     ) -> None:
         df_p = self.df_r[
-                   [
-                    'depth',
-                    'biolum_proxy_diatoms', 'biolum_proxy_adinos', 'biolum_proxy_hdinos',
-                    'fluo', 'bg_biolum'
-                   ]
-               ].copy(deep=True)
-        df_p['fluoBL_corr'] = \
-            ('time', np.full_like(self.resampled_nc["profile_number"], np.nan))
-
-        profile_series = self.resampled_nc['profile_number'].to_series()
-        df_p['profile_number'] = profile_series.reindex(df_p.index, method='ffill')
+            [
+                "depth",
+                "biolum_proxy_diatoms",
+                "biolum_proxy_adinos",
+                "biolum_proxy_hdinos",
+                "fluo",
+                "bg_biolum",
+            ]
+        ].copy(deep=True)
+        df_p["fluoBL_corr"] = ("time", np.full_like(self.resampled_nc["profile_number"], np.nan))
+
+        profile_series = self.resampled_nc["profile_number"].to_series()
+        df_p["profile_number"] = profile_series.reindex(df_p.index, method="ffill")
         # make unique profiles across all surveys
-        profil = np.cumsum(np.abs(np.diff(df_p['profile_number'], prepend=0)))
+        profil = np.cumsum(np.abs(np.diff(df_p["profile_number"], prepend=0)))
 
         # new proxies are the "N" fields
         for new, old in zip(
-            ['diatomsN','adinosN','hdinosN'],
-            ['biolum_proxy_diatoms','biolum_proxy_adinos','biolum_proxy_hdinos']
+            ["diatomsN", "adinosN", "hdinosN"],
+            ["biolum_proxy_diatoms", "biolum_proxy_adinos", "biolum_proxy_hdinos"],
+            strict=False,
         ):
-            df_p[new] = ('time', np.full_like(df_p[old], np.nan))
+            df_p[new] = ("time", np.full_like(df_p[old], np.nan))
 
         # compute correlation per profil and then correct proxies
         dt_5mins = np.timedelta64(timedelta(minutes=minutes_from_surface_threshold))
         for iprofil_ in range(1, int(np.max(profil))):
-            iprofil = (profil == iprofil_)
+            iprofil = profil == iprofil_
             # excludes surface, must be within 5 min of it
             ideep = iprofil & (df_p.depth > depth_threshold)
-            itime = (df_p.index > (df_p.index[ideep].min() - dt_5mins)) & \
-                    (df_p.index < (df_p.index[ideep].max() + dt_5mins))
+            itime = (df_p.index > (df_p.index[ideep].min() - dt_5mins)) & (
+                df_p.index < (df_p.index[ideep].max() + dt_5mins)
+            )
             iprofil = iprofil & itime
             if not np.any(iprofil):
-                #print(f'no corrections possible for {iprofil_=}')
+                # print(f'no corrections possible for {iprofil_=}')
                 continue
             auv_profil = df_p.loc[iprofil]
             if auv_profil.shape[0] > correction_threshold:
@@ -715,13 +719,15 @@ def correct_biolume_proxies(  # noqa: PLR0913
                 else:
                     # correlation between fluo and bg_biolum computed on high
                     # adino values for each profile
-                    idepth = auv_profil.depth < auv_profil.depth[
-                                                    auv_profil.adinos > adinos_threshold
-                                                ].max()
+                    idepth = (
+                        auv_profil.depth
+                        < auv_profil.depth[auv_profil.adinos > adinos_threshold].max()
+                    )
                     # pandas' corr ignores NaN
-                    auv_profil_idepth = auv_profil[['fluo', 'bg_biolum']].loc[idepth]
-                    fluoBL_corr = auv_profil_idepth.fluo.corr(auv_profil_idepth.bg_biolum,
-                                                              method=corr_type)
+                    auv_profil_idepth = auv_profil[["fluo", "bg_biolum"]].loc[idepth]
+                    fluoBL_corr = auv_profil_idepth.fluo.corr(
+                        auv_profil_idepth.bg_biolum, method=corr_type
+                    )
 
                 # save correlation
                 df_p.fluoBL_corr[iprofil] = fluoBL_corr
@@ -730,8 +736,9 @@ def correct_biolume_proxies(  # noqa: PLR0913
                 fluoBL_correctionfactor = (fluoBL_corr + 1.0) / 2.0
 
                 # then scale between fluo_bl_threshold and 1
-                fluoBL_correctionfactor = \
+                fluoBL_correctionfactor = (
                     fluoBL_correctionfactor * (1.0 - fluo_bl_threshold) + fluo_bl_threshold
+                )
 
                 # can happen if fluo_bl_threshold is negative
                 fluoBL_correctionfactor = max(fluoBL_correctionfactor, 0.0)
@@ -739,12 +746,14 @@ def correct_biolume_proxies(  # noqa: PLR0913
                 df_p.adinosN[iprofil] = df_p.adinos[iprofil] * fluoBL_correctionfactor
 
                 # preserving adinos+diatoms
-                df_p.diatomsN[iprofil] = \
+                df_p.diatomsN[iprofil] = (
                     df_p.adinos[iprofil] + df_p.diatoms[iprofil] - df_p.adinosN[iprofil]
+                )
 
                 # preserving adinos+hdinos
-                df_p.hdinosN[iprofil] = \
+                df_p.hdinosN[iprofil] = (
                     df_p.adinos[iprofil] + df_p.hdinos[iprofil] - df_p.adinosN[iprofil]
+                )
 
                 self.df_r.biolum_proxy_adinos[iprofil] = df_p.adinosN[iprofil]
                 self.df_r.biolum_proxy_diatoms[iprofil] = df_p.diatomsN[iprofil]

From 71b088c0ff764f907d6cf4710373a03bc975ffa8 Mon Sep 17 00:00:00 2001
From: Michael Anderson <anderson@mbari.org>
Date: Tue, 15 Apr 2025 15:19:25 -0700
Subject: [PATCH 4/9] pass fluo from add_biolume_proxies to correction; get
 depth and profile from resampled_nc

Signed-off-by: Michael Anderson <anderson@mbari.org>
---
 src/data/resample.py | 67 ++++++++++++++++++++++++++++++--------------
 1 file changed, 46 insertions(+), 21 deletions(-)

diff --git a/src/data/resample.py b/src/data/resample.py
index 822145d5..cca6df48 100755
--- a/src/data/resample.py
+++ b/src/data/resample.py
@@ -610,6 +610,7 @@ def add_biolume_proxies(  # noqa: PLR0913, PLR0915
         self.df_r["biolume_bg_biolume"].attrs["units"] = "photons/liter"
         self.df_r["biolume_bg_biolume"].attrs["comment"] = zero_note
 
+        fluo = None
         nighttime_bl_raw, sunsets, sunrises = self.select_nighttime_bl_raw()
         if nighttime_bl_raw.empty:
             self.logger.info(
@@ -621,7 +622,7 @@ def add_biolume_proxies(  # noqa: PLR0913, PLR0915
                 self.logger.info(
                     "No hs2_fl700 data. Not computing adinos, diatoms, and hdinos",
                 )
-                return
+                return None
             fluo = (
                 self.resampled_nc["hs2_fl700"]
                 .where(
@@ -664,8 +665,11 @@ def add_biolume_proxies(  # noqa: PLR0913, PLR0915
                 f" = {proxy_ratio_adinos:.4e} and proxy_cal_factor = {proxy_cal_factor:.6f}"
             )
 
+        return fluo
+
     def correct_biolume_proxies(  # noqa: PLR0913
         self,
+        biolume_fluo: float,  # from add_biolume_proxies
         adinos_threshold: float = 0.1,
         correction_threshold: int = 3,
         fluo_bl_threshold: float = 0.25,  # use 0.45 for pearson corr
@@ -673,17 +677,23 @@ def correct_biolume_proxies(  # noqa: PLR0913
         depth_threshold: float = 2.0,
         minutes_from_surface_threshold: int = 5,
     ) -> None:
-        df_p = self.df_r[
-            [
-                "depth",
-                "biolum_proxy_diatoms",
-                "biolum_proxy_adinos",
-                "biolum_proxy_hdinos",
-                "fluo",
-                "bg_biolum",
-            ]
-        ].copy(deep=True)
-        df_p["fluoBL_corr"] = ("time", np.full_like(self.resampled_nc["profile_number"], np.nan))
+        variables = [
+            "biolume_proxy_diatoms",
+            "biolume_proxy_adinos",
+            "biolume_proxy_hdinos",
+            "biolume_bg_biolume",
+        ]
+        try:
+            df_p = self.df_r[variables].copy(deep=True)
+        except KeyError:
+            # We didn't add biolum proxies this round...
+            return
+
+        df_p["biolume_fluo"] = biolume_fluo
+        df_p["fluoBL_corr"] = ("time", np.full_like(df_p.biolume_fluo, np.nan))
+
+        depth_series = self.resampled_nc["depth"].to_series()
+        df_p["depth"] = depth_series.reindex(df_p.index, method="ffill")
 
         profile_series = self.resampled_nc["profile_number"].to_series()
         df_p["profile_number"] = profile_series.reindex(df_p.index, method="ffill")
@@ -713,7 +723,10 @@ def correct_biolume_proxies(  # noqa: PLR0913
                 continue
             auv_profil = df_p.loc[iprofil]
             if auv_profil.shape[0] > correction_threshold:
-                if np.sum(auv_profil.adinos > adinos_threshold) < correction_threshold:
+                if (
+                    np.sum(auv_profil.biolume_proxy_adinos > adinos_threshold)
+                    < correction_threshold
+                ):
                     # no correction for low fluo & biolum values
                     fluoBL_corr = 1.0
                 else:
@@ -721,16 +734,23 @@ def correct_biolume_proxies(  # noqa: PLR0913
                     # adino values for each profile
                     idepth = (
                         auv_profil.depth
-                        < auv_profil.depth[auv_profil.adinos > adinos_threshold].max()
+                        < auv_profil.depth[auv_profil.biolume_proxy_adinos > adinos_threshold].max()
                     )
+                    auv_profil_idepth = auv_profil[["biolume_fluo", "biolume_bg_biolume"]].loc[
+                        idepth
+                    ]
                     # pandas' corr ignores NaN
-                    auv_profil_idepth = auv_profil[["fluo", "bg_biolum"]].loc[idepth]
-                    fluoBL_corr = auv_profil_idepth.fluo.corr(
-                        auv_profil_idepth.bg_biolum, method=corr_type
+                    fluoBL_corr = auv_profil_idepth.biolume_fluo.corr(
+                        auv_profil_idepth.biolume_bg_biolume, method=corr_type
                     )
 
                 # save correlation
                 df_p.fluoBL_corr[iprofil] = fluoBL_corr
+                self.logger.info(
+                    "Correcting proxies for profile=%d using fluoBL_corr=%.4f",
+                    iprofil_,
+                    fluoBL_corr,
+                )
 
                 # scale between 0 and 1 first
                 fluoBL_correctionfactor = (fluoBL_corr + 1.0) / 2.0
@@ -743,16 +763,20 @@ def correct_biolume_proxies(  # noqa: PLR0913
                 # can happen if fluo_bl_threshold is negative
                 fluoBL_correctionfactor = max(fluoBL_correctionfactor, 0.0)
 
-                df_p.adinosN[iprofil] = df_p.adinos[iprofil] * fluoBL_correctionfactor
+                df_p.adinosN[iprofil] = df_p.biolume_proxy_adinos[iprofil] * fluoBL_correctionfactor
 
                 # preserving adinos+diatoms
                 df_p.diatomsN[iprofil] = (
-                    df_p.adinos[iprofil] + df_p.diatoms[iprofil] - df_p.adinosN[iprofil]
+                    df_p.biolume_proxy_adinos[iprofil]
+                    + df_p.biolume_proxy_diatoms[iprofil]
+                    - df_p.adinosN[iprofil]
                 )
 
                 # preserving adinos+hdinos
                 df_p.hdinosN[iprofil] = (
-                    df_p.adinos[iprofil] + df_p.hdinos[iprofil] - df_p.adinosN[iprofil]
+                    df_p.biolume_proxy_adinos[iprofil]
+                    + df_p.biolume_proxy_hdinos[iprofil]
+                    - df_p.adinosN[iprofil]
                 )
 
                 self.df_r.biolum_proxy_adinos[iprofil] = df_p.adinosN[iprofil]
@@ -773,7 +797,8 @@ def resample_variable(  # noqa: PLR0913
         if instr == "biolume" and variable == "biolume_raw":
             # Only biolume_avg_biolume and biolume_flow treated like other data
             # All other biolume variables in self.df_r[] are computed from biolume_raw
-            self.add_biolume_proxies(freq)
+            biolume_fluo = self.add_biolume_proxies(freq)
+            self.correct_biolume_proxies(biolume_fluo)
         else:
             self.df_o[variable] = self.ds[variable].to_pandas()
             self.df_o[f"{variable}_mf"] = (

From 3216e2db9bdc1386d4935b2586cf14287ca5ce3e Mon Sep 17 00:00:00 2001
From: Michael Anderson <anderson@mbari.org>
Date: Wed, 7 May 2025 16:22:10 -0700
Subject: [PATCH 5/9] tested proxy corrections with sample mission

Signed-off-by: Michael Anderson <anderson@mbari.org>
---
 src/data/resample.py | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/src/data/resample.py b/src/data/resample.py
index cca6df48..dee68c6f 100755
--- a/src/data/resample.py
+++ b/src/data/resample.py
@@ -672,7 +672,7 @@ def correct_biolume_proxies(  # noqa: PLR0913
         biolume_fluo: float,  # from add_biolume_proxies
         adinos_threshold: float = 0.1,
         correction_threshold: int = 3,
-        fluo_bl_threshold: float = 0.25,  # use 0.45 for pearson corr
+        fluo_bl_threshold: float = 0.35,  # use 0.45 for pearson corr
         corr_type: str = "spearman",  # or pearson
         depth_threshold: float = 2.0,
         minutes_from_surface_threshold: int = 5,
@@ -690,23 +690,32 @@ def correct_biolume_proxies(  # noqa: PLR0913
             return
 
         df_p["biolume_fluo"] = biolume_fluo
-        df_p["fluoBL_corr"] = ("time", np.full_like(df_p.biolume_fluo, np.nan))
+        df_p["fluoBL_corr"] = np.full_like(df_p.biolume_fluo, np.nan)
 
         depth_series = self.resampled_nc["depth"].to_series()
         df_p["depth"] = depth_series.reindex(df_p.index, method="ffill")
 
         profile_series = self.resampled_nc["profile_number"].to_series()
         df_p["profile_number"] = profile_series.reindex(df_p.index, method="ffill")
+        df_p.biolume_bg_biolume.dropna(inplace=True)
         # make unique profiles across all surveys
-        profil = np.cumsum(np.abs(np.diff(df_p["profile_number"], prepend=0)))
+        df_p.dropna(inplace=True, subset=[
+            "depth",
+            "profile_number",
+            "biolume_proxy_diatoms",
+            "biolume_proxy_adinos",
+            "biolume_proxy_hdinos",
+            "biolume_fluo"
+        ])
+        profil = np.cumsum(np.abs(np.diff(df_p.profile_number, prepend=0)))
 
         # new proxies are the "N" fields
         for new, old in zip(
             ["diatomsN", "adinosN", "hdinosN"],
-            ["biolum_proxy_diatoms", "biolum_proxy_adinos", "biolum_proxy_hdinos"],
+            ["biolume_proxy_diatoms", "biolume_proxy_adinos", "biolume_proxy_hdinos"],
             strict=False,
         ):
-            df_p[new] = ("time", np.full_like(df_p[old], np.nan))
+            df_p[new] = np.full_like(df_p[old], np.nan)
 
         # compute correlation per profil and then correct proxies
         dt_5mins = np.timedelta64(timedelta(minutes=minutes_from_surface_threshold))
@@ -779,9 +788,10 @@ def correct_biolume_proxies(  # noqa: PLR0913
                     - df_p.adinosN[iprofil]
                 )
 
-                self.df_r.biolum_proxy_adinos[iprofil] = df_p.adinosN[iprofil]
-                self.df_r.biolum_proxy_diatoms[iprofil] = df_p.diatomsN[iprofil]
-                self.df_r.biolum_proxy_hdinos[iprofil] = df_p.hdinosN[iprofil]
+                target_indices = df_p.index[iprofil]
+                self.df_r.loc[target_indices, 'biolume_proxy_adinos'] = df_p.adinosN.loc[iprofil]
+                self.df_r.loc[target_indices, 'biolume_proxy_diatoms'] = df_p.diatomsN.loc[iprofil]
+                self.df_r.loc[target_indices, 'biolume_proxy_hdinos'] = df_p.hdinosN.loc[iprofil]
 
     def resample_variable(  # noqa: PLR0913
         self,

From 282132bfa1807f2cbc208ccd7d43920964269011 Mon Sep 17 00:00:00 2001
From: Michael Anderson <anderson@mbari.org>
Date: Wed, 21 May 2025 14:36:48 -0700
Subject: [PATCH 6/9] proxies corrections edge cases; make linters happy

Signed-off-by: Michael Anderson <anderson@mbari.org>
---
 src/data/resample.py | 178 +++++++++++++++++++++++++++++++++----------
 1 file changed, 137 insertions(+), 41 deletions(-)

diff --git a/src/data/resample.py b/src/data/resample.py
index dee68c6f..b7c79a98 100755
--- a/src/data/resample.py
+++ b/src/data/resample.py
@@ -15,7 +15,7 @@
 import sys
 import time
 from collections import defaultdict
-from datetime import datetime, timedelta, timezone
+from datetime import UTC, datetime, timedelta
 from pathlib import Path
 from socket import gethostname
 
@@ -51,7 +51,7 @@ class Resampler:
     def __init__(self) -> None:
         plt.rcParams["figure.figsize"] = (15, 5)
         self.resampled_nc = xr.Dataset()
-        iso_now = datetime.now(tz=timezone.utc).isoformat().split(".")[0] + "Z"
+        iso_now = datetime.now(tz=UTC).isoformat().split(".")[0] + "Z"
         # Common static attributes for all auv platforms
         self.metadata = {}
         self.metadata["netcdf_version"] = "4"
@@ -75,7 +75,7 @@ def _build_global_metadata(self) -> None:
                 e,
             )
             gitcommit = "<failed to get git commit>"
-        iso_now = datetime.now(tz=timezone.utc).isoformat().split(".")[0] + "Z"
+        iso_now = datetime.now(tz=UTC).isoformat().split(".")[0] + "Z"
         # Common dynamic attributes for all auv platforms
         self.metadata["time_coverage_start"] = str(min(self.resampled_nc.time.values))
         self.metadata["time_coverage_end"] = str(max(self.resampled_nc.time.values))
@@ -373,7 +373,7 @@ def select_nighttime_bl_raw(
                 get_altitude(
                     lat,
                     lon,
-                    datetime.fromtimestamp(ts.astype(int) / 1.0e9, tz=timezone.utc),
+                    datetime.fromtimestamp(ts.astype(int) / 1.0e9, tz=UTC),
                 ),
             )
 
@@ -461,7 +461,7 @@ def add_biolume_proxies(  # noqa: PLR0913, PLR0915
         flash_threshold: float = FLASH_THRESHOLD,
         proxy_ratio_adinos: float = 3.9811e13,  # 4-Oct-2010 to 2-Dec-2020 value
         proxy_cal_factor=0.00470,  # Same as used in 5.2-mpm-bg_biolume-PiO-paper.ipynb
-    ) -> None:
+    ) -> tuple[pd.Series, list[datetime], list[datetime]]:
         # Add variables via the calculations according to Appendix B in
         # "Using fluorescence and bioluminescence sensors to characterize
         # auto- and heterotrophic plankton communities" by Messie et al."
@@ -622,7 +622,7 @@ def add_biolume_proxies(  # noqa: PLR0913, PLR0915
                 self.logger.info(
                     "No hs2_fl700 data. Not computing adinos, diatoms, and hdinos",
                 )
-                return None
+                return fluo, sunsets, sunrises
             fluo = (
                 self.resampled_nc["hs2_fl700"]
                 .where(
@@ -665,15 +665,17 @@ def add_biolume_proxies(  # noqa: PLR0913, PLR0915
                 f" = {proxy_ratio_adinos:.4e} and proxy_cal_factor = {proxy_cal_factor:.6f}"
             )
 
-        return fluo
+        return fluo, sunsets, sunrises
 
-    def correct_biolume_proxies(  # noqa: PLR0913
+    def correct_biolume_proxies(  # noqa: PLR0913, PLR0915
         self,
-        biolume_fluo: float,  # from add_biolume_proxies
+        biolume_fluo: pd.Series,  # from add_biolume_proxies
+        biolume_sunsets: list[datetime],  # from add_biolume_proxies
+        biolume_sunrises: list[datetime],  # from add_biolume_proxies
         adinos_threshold: float = 0.1,
         correction_threshold: int = 3,
         fluo_bl_threshold: float = 0.35,  # use 0.45 for pearson corr
-        corr_type: str = "spearman",  # or pearson
+        corr_type: str = "pearson",  # "spearman" or "pearson"
         depth_threshold: float = 2.0,
         minutes_from_surface_threshold: int = 5,
     ) -> None:
@@ -693,21 +695,32 @@ def correct_biolume_proxies(  # noqa: PLR0913
         df_p["fluoBL_corr"] = np.full_like(df_p.biolume_fluo, np.nan)
 
         depth_series = self.resampled_nc["depth"].to_series()
-        df_p["depth"] = depth_series.reindex(df_p.index, method="ffill")
+        # df_p["depth"] = depth_series.reindex(df_p.index, method="ffill")
+        df_p = pd.merge_asof(
+            df_p, depth_series.to_frame(), left_index=True, right_index=True, direction="nearest"
+        )
+
+        self.logger.info(
+            "correct proxies: df_p depth count=%d nans=%d, resampled_nc depth count=%d nans=%d",
+            df_p.depth.count(),
+            df_p.depth.isna().sum(),
+            self.resampled_nc.depth.count(),
+            self.resampled_nc.depth.isnull().sum(),  # noqa: PD003
+        )
 
         profile_series = self.resampled_nc["profile_number"].to_series()
-        df_p["profile_number"] = profile_series.reindex(df_p.index, method="ffill")
-        df_p.biolume_bg_biolume.dropna(inplace=True)
-        # make unique profiles across all surveys
-        df_p.dropna(inplace=True, subset=[
-            "depth",
-            "profile_number",
-            "biolume_proxy_diatoms",
-            "biolume_proxy_adinos",
-            "biolume_proxy_hdinos",
-            "biolume_fluo"
-        ])
-        profil = np.cumsum(np.abs(np.diff(df_p.profile_number, prepend=0)))
+        # df_p["profile_number"] = profile_series.reindex(df_p.index, method="ffill")
+        df_p = pd.merge_asof(
+            df_p, profile_series.to_frame(), left_index=True, right_index=True, direction="nearest"
+        )
+
+        self.logger.info(
+            "correct proxies: df_p max profile=%d (nans=%d), resampled_nc max profile=%d (nans=%d)",
+            df_p.profile_number.max(),
+            df_p.profile_number.isna().sum(),
+            self.resampled_nc.profile_number.max(),
+            self.resampled_nc.profile_number.isnull().sum(),  # noqa: PD003
+        )
 
         # new proxies are the "N" fields
         for new, old in zip(
@@ -717,10 +730,52 @@ def correct_biolume_proxies(  # noqa: PLR0913
         ):
             df_p[new] = np.full_like(df_p[old], np.nan)
 
+        def _interval_contains_sunevent(
+            start: pd.Timestamp, end: pd.Timestamp, events: pd.DatetimeIndex
+        ) -> bool:
+            mask = (events >= start) & (events <= end)
+            return mask.any()
+
+        biolume_sunsets = pd.DatetimeIndex(biolume_sunsets).sort_values()
+        biolume_sunrises = pd.DatetimeIndex(biolume_sunrises).sort_values()
+        profile_intervals = (
+            df_p.groupby("profile_number")
+            .apply(lambda g: (g.index.min(), g.index.max()), include_groups=False)
+            .rename("interval")
+            .apply(pd.Series)
+            .rename(columns={0: "start", 1: "end"})
+        )
+        profile_intervals["has_sunset"] = profile_intervals.apply(
+            lambda row: _interval_contains_sunevent(row["start"], row["end"], biolume_sunsets),
+            axis=1,
+        )
+        profile_intervals["has_sunrise"] = profile_intervals.apply(
+            lambda row: _interval_contains_sunevent(row["start"], row["end"], biolume_sunrises),
+            axis=1,
+        )
+        profile_intervals["has_sunevent"] = (
+            profile_intervals["has_sunrise"] | profile_intervals["has_sunset"]
+        )
+        df_p["has_sunset"] = df_p["profile_number"].map(profile_intervals["has_sunset"])
+        df_p["has_sunrise"] = df_p["profile_number"].map(profile_intervals["has_sunrise"])
+        df_p["has_sunevent"] = df_p["profile_number"].map(profile_intervals["has_sunevent"])
+
         # compute correlation per profil and then correct proxies
+        profil = df_p.profile_number
         dt_5mins = np.timedelta64(timedelta(minutes=minutes_from_surface_threshold))
-        for iprofil_ in range(1, int(np.max(profil))):
+        for iprofil_ in range(1, int(np.max(profil)) + 1):
             iprofil = profil == iprofil_
+            has_sunevent = df_p.loc[iprofil, "has_sunevent"].any()
+            if has_sunevent:  # set proxies for this profile to NaN
+                self.logger.info(
+                    "Processing profile=%d for proxy correction: found sun event -- set NaN",
+                    iprofil_,
+                )
+                target_indices = df_p.index[iprofil]
+                self.df_r.loc[target_indices, "biolume_proxy_adinos"] = np.nan
+                self.df_r.loc[target_indices, "biolume_proxy_diatoms"] = np.nan
+                self.df_r.loc[target_indices, "biolume_proxy_hdinos"] = np.nan
+                continue
             # excludes surface, must be within 5 min of it
             ideep = iprofil & (df_p.depth > depth_threshold)
             itime = (df_p.index > (df_p.index[ideep].min() - dt_5mins)) & (
@@ -731,13 +786,35 @@ def correct_biolume_proxies(  # noqa: PLR0913
                 # print(f'no corrections possible for {iprofil_=}')
                 continue
             auv_profil = df_p.loc[iprofil]
+            self.logger.info(
+                "Processing profile=%d for proxy correction: total_points=%d > thresh=%d ?",
+                iprofil_,
+                auv_profil.shape[0],
+                correction_threshold,
+            )
             if auv_profil.shape[0] > correction_threshold:
                 if (
                     np.sum(auv_profil.biolume_proxy_adinos > adinos_threshold)
                     < correction_threshold
                 ):
+                    if auv_profil.biolume_proxy_adinos.count() == 0:  # all proxies are NaN so skip
+                        self.logger.info(
+                            "Correcting proxies: valid adinos=%d < thresh=%d -- all NaN so skip",
+                            np.sum(auv_profil.biolume_proxy_adinos > adinos_threshold),
+                            correction_threshold,
+                        )
+                        continue
                     # no correction for low fluo & biolum values
                     fluoBL_corr = 1.0
+                    self.logger.info(
+                        "Correcting proxies: valid adinos=%d < thresh=%d"
+                        " -- using fluoBL_corr=%.4f, total_size_adinos=%d, nans=%d",
+                        np.sum(auv_profil.biolume_proxy_adinos > adinos_threshold),
+                        correction_threshold,
+                        fluoBL_corr,
+                        auv_profil.biolume_proxy_adinos.shape[0],
+                        auv_profil.biolume_proxy_adinos.isna().sum(),
+                    )
                 else:
                     # correlation between fluo and bg_biolum computed on high
                     # adino values for each profile
@@ -745,21 +822,33 @@ def correct_biolume_proxies(  # noqa: PLR0913
                         auv_profil.depth
                         < auv_profil.depth[auv_profil.biolume_proxy_adinos > adinos_threshold].max()
                     )
-                    auv_profil_idepth = auv_profil[["biolume_fluo", "biolume_bg_biolume"]].loc[
-                        idepth
-                    ]
+                    auv_profil_idepth = auv_profil[
+                        ["biolume_fluo", "biolume_bg_biolume", "depth"]
+                    ].loc[idepth]
                     # pandas' corr ignores NaN
                     fluoBL_corr = auv_profil_idepth.biolume_fluo.corr(
                         auv_profil_idepth.biolume_bg_biolume, method=corr_type
                     )
+                    self.logger.info(
+                        "Correcting proxies: valid adinos=%d > thresh=%d"
+                        " -- using fluoBL_corr=%.4f, total_size_idepth=%d, nans=%d,"
+                        " min_depth=%.4f, max_depth=%.4f",
+                        np.sum(auv_profil.biolume_proxy_adinos > adinos_threshold),
+                        correction_threshold,
+                        fluoBL_corr,
+                        auv_profil_idepth.shape[0],
+                        auv_profil.biolume_proxy_adinos.isna().sum(),
+                        auv_profil_idepth.depth.min(),
+                        auv_profil_idepth.depth.max(),
+                    )
 
                 # save correlation
-                df_p.fluoBL_corr[iprofil] = fluoBL_corr
-                self.logger.info(
-                    "Correcting proxies for profile=%d using fluoBL_corr=%.4f",
-                    iprofil_,
-                    fluoBL_corr,
-                )
+                df_p.loc[iprofil, "fluoBL_corr"] = fluoBL_corr
+                # self.logger.info(
+                #    "Correcting proxies for profile=%d using fluoBL_corr=%.4f",
+                #    iprofil_,
+                #    fluoBL_corr,
+                # )
 
                 # scale between 0 and 1 first
                 fluoBL_correctionfactor = (fluoBL_corr + 1.0) / 2.0
@@ -772,26 +861,33 @@ def correct_biolume_proxies(  # noqa: PLR0913
                 # can happen if fluo_bl_threshold is negative
                 fluoBL_correctionfactor = max(fluoBL_correctionfactor, 0.0)
 
-                df_p.adinosN[iprofil] = df_p.biolume_proxy_adinos[iprofil] * fluoBL_correctionfactor
+                df_p.loc[iprofil, "adinosN"] = (
+                    df_p.biolume_proxy_adinos[iprofil] * fluoBL_correctionfactor
+                )
 
                 # preserving adinos+diatoms
-                df_p.diatomsN[iprofil] = (
+                df_p.loc[iprofil, "diatomsN"] = (
                     df_p.biolume_proxy_adinos[iprofil]
                     + df_p.biolume_proxy_diatoms[iprofil]
                     - df_p.adinosN[iprofil]
                 )
 
                 # preserving adinos+hdinos
-                df_p.hdinosN[iprofil] = (
+                df_p.loc[iprofil, "hdinosN"] = (
                     df_p.biolume_proxy_adinos[iprofil]
                     + df_p.biolume_proxy_hdinos[iprofil]
                     - df_p.adinosN[iprofil]
                 )
 
                 target_indices = df_p.index[iprofil]
-                self.df_r.loc[target_indices, 'biolume_proxy_adinos'] = df_p.adinosN.loc[iprofil]
-                self.df_r.loc[target_indices, 'biolume_proxy_diatoms'] = df_p.diatomsN.loc[iprofil]
-                self.df_r.loc[target_indices, 'biolume_proxy_hdinos'] = df_p.hdinosN.loc[iprofil]
+                self.df_r.loc[target_indices, "biolume_proxy_adinos"] = df_p.adinosN.loc[iprofil]
+                self.df_r.loc[target_indices, "biolume_proxy_diatoms"] = df_p.diatomsN.loc[iprofil]
+                self.df_r.loc[target_indices, "biolume_proxy_hdinos"] = df_p.hdinosN.loc[iprofil]
+            else:
+                self.logger.info(
+                    "profile=%d skipped for proxy correction",
+                    iprofil_,
+                )
 
     def resample_variable(  # noqa: PLR0913
         self,
@@ -807,8 +903,8 @@ def resample_variable(  # noqa: PLR0913
         if instr == "biolume" and variable == "biolume_raw":
             # Only biolume_avg_biolume and biolume_flow treated like other data
             # All other biolume variables in self.df_r[] are computed from biolume_raw
-            biolume_fluo = self.add_biolume_proxies(freq)
-            self.correct_biolume_proxies(biolume_fluo)
+            biolume_fluo, biolume_sunsets, biolume_sunrises = self.add_biolume_proxies(freq)
+            self.correct_biolume_proxies(biolume_fluo, biolume_sunsets, biolume_sunrises)
         else:
             self.df_o[variable] = self.ds[variable].to_pandas()
             self.df_o[f"{variable}_mf"] = (

From a8179f27fda99219dc8eb3512938b7c5243aac5f Mon Sep 17 00:00:00 2001
From: Michael Anderson <anderson@mbari.org>
Date: Wed, 21 May 2025 14:42:03 -0700
Subject: [PATCH 7/9] put UTC back to timezone.utc for python3.10

Signed-off-by: Michael Anderson <anderson@mbari.org>
---
 src/data/resample.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/data/resample.py b/src/data/resample.py
index b7c79a98..a81131e5 100755
--- a/src/data/resample.py
+++ b/src/data/resample.py
@@ -15,7 +15,7 @@
 import sys
 import time
 from collections import defaultdict
-from datetime import UTC, datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from pathlib import Path
 from socket import gethostname
 
@@ -51,7 +51,7 @@ class Resampler:
     def __init__(self) -> None:
         plt.rcParams["figure.figsize"] = (15, 5)
         self.resampled_nc = xr.Dataset()
-        iso_now = datetime.now(tz=UTC).isoformat().split(".")[0] + "Z"
+        iso_now = datetime.now(tz=timezone.utc).isoformat().split(".")[0] + "Z"
         # Common static attributes for all auv platforms
         self.metadata = {}
         self.metadata["netcdf_version"] = "4"
@@ -75,7 +75,7 @@ def _build_global_metadata(self) -> None:
                 e,
             )
             gitcommit = "<failed to get git commit>"
-        iso_now = datetime.now(tz=UTC).isoformat().split(".")[0] + "Z"
+        iso_now = datetime.now(tz=timezone.utc).isoformat().split(".")[0] + "Z"
         # Common dynamic attributes for all auv platforms
         self.metadata["time_coverage_start"] = str(min(self.resampled_nc.time.values))
         self.metadata["time_coverage_end"] = str(max(self.resampled_nc.time.values))
@@ -373,7 +373,7 @@ def select_nighttime_bl_raw(
                 get_altitude(
                     lat,
                     lon,
-                    datetime.fromtimestamp(ts.astype(int) / 1.0e9, tz=UTC),
+                    datetime.fromtimestamp(ts.astype(int) / 1.0e9, tz=timezone.utc),
                 ),
             )
 

From 2d54f722e12174c7094841516dd6aee1c763c0bd Mon Sep 17 00:00:00 2001
From: Michael Anderson <anderson@mbari.org>
Date: Fri, 23 May 2025 15:08:52 -0700
Subject: [PATCH 8/9] remove include_groups field in pandas lambda for now

Signed-off-by: Michael Anderson <anderson@mbari.org>
---
 src/data/resample.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/data/resample.py b/src/data/resample.py
index a81131e5..7c407db4 100755
--- a/src/data/resample.py
+++ b/src/data/resample.py
@@ -740,7 +740,7 @@ def _interval_contains_sunevent(
         biolume_sunrises = pd.DatetimeIndex(biolume_sunrises).sort_values()
         profile_intervals = (
             df_p.groupby("profile_number")
-            .apply(lambda g: (g.index.min(), g.index.max()), include_groups=False)
+            .apply(lambda g: (g.index.min(), g.index.max()))  # pandas2.2.3: include_groups=False
             .rename("interval")
             .apply(pd.Series)
             .rename(columns={0: "start", 1: "end"})

From 8a998c8a14fca45b804b59c78805a3c30f66f08e Mon Sep 17 00:00:00 2001
From: Michael Anderson <anderson@mbari.org>
Date: Tue, 27 May 2025 10:56:09 -0700
Subject: [PATCH 9/9] no reason to exclude that last depth point since that's
 still above adinos_threshold

Signed-off-by: Michael Anderson <anderson@mbari.org>
---
 src/data/resample.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/data/resample.py b/src/data/resample.py
index 7c407db4..64b7b73e 100755
--- a/src/data/resample.py
+++ b/src/data/resample.py
@@ -820,7 +820,7 @@ def _interval_contains_sunevent(
                     # adino values for each profile
                     idepth = (
                         auv_profil.depth
-                        < auv_profil.depth[auv_profil.biolume_proxy_adinos > adinos_threshold].max()
+                        <= auv_profil.depth[auv_profil.biolume_proxy_adinos > adinos_threshold].max()
                     )
                     auv_profil_idepth = auv_profil[
                         ["biolume_fluo", "biolume_bg_biolume", "depth"]