Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@
// Fails with ValueError: different number of dimensions on data and dims: 2 vs 1 for wetlabsubat_digitized_raw_ad_counts variable
//"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup", "--clobber"]
// ValueError: Dimension mismatch: wetlabsubat_digitized_raw_ad_counts_time has 33154 elements but wetlabsubat_hv_step_calibration_coefficient_time has 33155 elements
"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250623_20250707/20250626T041517/202506260415_202506261400.nc4", "--no_cleanup"]
//"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250623_20250707/20250626T041517/202506260415_202506261400.nc4", "--no_cleanup"]
// ValueError: coords is not dict-like, but it has 1 items, which does not match the 2 dimensions of the data
//"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250623_20250707/20250626T140000/202506261400_202506262031.nc4", "--no_cleanup"]
// Full month of June 2025 for Pontus with WetLabsUBAT Group data
Expand All @@ -393,6 +393,8 @@
//"args": ["-v", "1", "--auv_name", "brizo", "--start", "20250901T000000", "--end", "20251001T000000", "--no_cleanup"]
// No GPS data for a log_file that has an ESP Sample
//"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250920T070029/202509200700_202509201900.nc4", "--no_cleanup"]
// Has nighttime data for proxys to be computed
"args": ["-v", "1", "--log_file", "pontus/missionlogs/2024/20240725_20240729/20240729T023020/202407290230_202407291556.nc4", "--no_cleanup"]
},

]
Expand Down
93 changes: 55 additions & 38 deletions src/data/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1328,35 +1328,45 @@ def select_nighttime_ubat_raw(

def correct_biolume_proxies( # noqa: C901, PLR0912, PLR0913, PLR0915
self,
biolume_fluo: pd.Series, # from add_biolume_proxies
biolume_sunsets: list[datetime], # from add_biolume_proxies
biolume_sunrises: list[datetime], # from add_biolume_proxies
biolume_fluo: pd.Series, # from add_biolume_proxies or add_wetlabsubat_proxies
biolume_sunsets: list[datetime], # from add_biolume_proxies or add_wetlabsubat_proxies
biolume_sunrises: list[datetime], # from add_biolume_proxies or add_wetlabsubat_proxies
depth_threshold: float,
adinos_threshold: float = 0.1,
correction_threshold: int = 3,
fluo_bl_threshold: float = 0.4,
corr_type: str = "pearson", # "spearman" or "pearson"
minutes_from_surface_threshold: int = 5,
) -> None:
# Determine which instrument prefix to use based on available variables
prefix = None
if "biolume_proxy_diatoms" in self.df_r:
prefix = "biolume"
elif "wetlabsubat_proxy_diatoms" in self.df_r:
prefix = "wetlabsubat"
else:
# No biolume or wetlabsubat proxies to correct
return

variables = [
"biolume_proxy_diatoms",
"biolume_proxy_adinos",
"biolume_proxy_hdinos",
"biolume_bg_biolume",
f"{prefix}_proxy_diatoms",
f"{prefix}_proxy_adinos",
f"{prefix}_proxy_hdinos",
f"{prefix}_bg_biolume",
]
try:
df_p = self.df_r[variables].copy(deep=True)
except KeyError:
# We didn't add biolum proxies this round...
# We didn't add biolume proxies this round...
return

# Save the attrs for later as the correction process will drop them
saved_attrs = defaultdict(list)
for var in variables:
saved_attrs[var] = self.df_r[var].attrs

df_p["biolume_fluo"] = biolume_fluo
df_p["fluoBL_corr"] = np.full_like(df_p.biolume_fluo, np.nan)
df_p[f"{prefix}_fluo"] = biolume_fluo
df_p["fluoBL_corr"] = np.full_like(df_p[f"{prefix}_fluo"], np.nan)

depth_series = self.resampled_nc["depth"].to_series()
# df_p["depth"] = depth_series.reindex(df_p.index, method="ffill")
Expand Down Expand Up @@ -1397,7 +1407,7 @@ def correct_biolume_proxies( # noqa: C901, PLR0912, PLR0913, PLR0915
# new proxies are the "N" fields
for new, old in zip(
["diatomsN", "adinosN", "hdinosN"],
["biolume_proxy_diatoms", "biolume_proxy_adinos", "biolume_proxy_hdinos"],
[f"{prefix}_proxy_diatoms", f"{prefix}_proxy_adinos", f"{prefix}_proxy_hdinos"],
strict=False,
):
df_p[new] = np.full_like(df_p[old], np.nan)
Expand Down Expand Up @@ -1433,9 +1443,9 @@ def _interval_contains_sunevent(
df_p["has_sunevent"] = df_p["profile_number"].map(profile_intervals["has_sunevent"])

# Set all the proxies to nan and then add in the valid values in the loop below
self.df_r["biolume_proxy_adinos"] = np.nan
self.df_r["biolume_proxy_diatoms"] = np.nan
self.df_r["biolume_proxy_hdinos"] = np.nan
self.df_r[f"{prefix}_proxy_adinos"] = np.nan
self.df_r[f"{prefix}_proxy_diatoms"] = np.nan
self.df_r[f"{prefix}_proxy_hdinos"] = np.nan

# compute correlation per profil and then correct proxies
profil = df_p.profile_number
Expand All @@ -1449,9 +1459,9 @@ def _interval_contains_sunevent(
iprofil_,
)
target_indices = df_p.index[iprofil]
self.df_r.loc[target_indices, "biolume_proxy_adinos"] = np.nan
self.df_r.loc[target_indices, "biolume_proxy_diatoms"] = np.nan
self.df_r.loc[target_indices, "biolume_proxy_hdinos"] = np.nan
self.df_r.loc[target_indices, f"{prefix}_proxy_adinos"] = np.nan
self.df_r.loc[target_indices, f"{prefix}_proxy_diatoms"] = np.nan
self.df_r.loc[target_indices, f"{prefix}_proxy_hdinos"] = np.nan
continue
# excludes surface, must be within 5 min of it
ideep = iprofil & (df_p.depth > depth_threshold)
Expand All @@ -1471,13 +1481,14 @@ def _interval_contains_sunevent(
)
if auv_profil.shape[0] > correction_threshold:
if (
np.sum(auv_profil.biolume_proxy_adinos > adinos_threshold)
np.sum(auv_profil[f"{prefix}_proxy_adinos"] > adinos_threshold)
< correction_threshold
):
if auv_profil.biolume_proxy_adinos.count() == 0: # all proxies are NaN so skip
# all proxies are NaN so skip
if auv_profil[f"{prefix}_proxy_adinos"].count() == 0:
self.logger.info(
"Correcting proxies: valid adinos=%d < thresh=%d -- all NaN so skip",
np.sum(auv_profil.biolume_proxy_adinos > adinos_threshold),
np.sum(auv_profil[f"{prefix}_proxy_adinos"] > adinos_threshold),
correction_threshold,
)
continue
Expand All @@ -1486,37 +1497,37 @@ def _interval_contains_sunevent(
self.logger.info(
"Correcting proxies: valid adinos=%d < thresh=%d"
" -- using fluoBL_corr=%.4f, total_size_adinos=%d, nans=%d",
np.sum(auv_profil.biolume_proxy_adinos > adinos_threshold),
np.sum(auv_profil[f"{prefix}_proxy_adinos"] > adinos_threshold),
correction_threshold,
fluoBL_corr,
auv_profil.biolume_proxy_adinos.shape[0],
auv_profil.biolume_proxy_adinos.isna().sum(),
auv_profil[f"{prefix}_proxy_adinos"].shape[0],
auv_profil[f"{prefix}_proxy_adinos"].isna().sum(),
)
else:
# correlation between fluo and bg_biolum computed on high
# adino values for each profile
idepth = (
auv_profil.depth
<= auv_profil.depth[
auv_profil.biolume_proxy_adinos > adinos_threshold
auv_profil[f"{prefix}_proxy_adinos"] > adinos_threshold
].max()
)
auv_profil_idepth = auv_profil[
["biolume_fluo", "biolume_bg_biolume", "depth"]
[f"{prefix}_fluo", f"{prefix}_bg_biolume", "depth"]
].loc[idepth]
# pandas' corr ignores NaN
fluoBL_corr = auv_profil_idepth.biolume_fluo.corr(
auv_profil_idepth.biolume_bg_biolume, method=corr_type
fluoBL_corr = auv_profil_idepth[f"{prefix}_fluo"].corr(
auv_profil_idepth[f"{prefix}_bg_biolume"], method=corr_type
)
self.logger.info(
"Correcting proxies: valid adinos=%d > thresh=%d"
" -- using fluoBL_corr=%.4f, total_size_idepth=%d, nans=%d,"
" min_depth=%.4f, max_depth=%.4f",
np.sum(auv_profil.biolume_proxy_adinos > adinos_threshold),
np.sum(auv_profil[f"{prefix}_proxy_adinos"] > adinos_threshold),
correction_threshold,
fluoBL_corr,
auv_profil_idepth.shape[0],
auv_profil.biolume_proxy_adinos.isna().sum(),
auv_profil[f"{prefix}_proxy_adinos"].isna().sum(),
auv_profil_idepth.depth.min(),
auv_profil_idepth.depth.max(),
)
Expand All @@ -1541,27 +1552,29 @@ def _interval_contains_sunevent(
fluoBL_correctionfactor = max(fluoBL_correctionfactor, 0.0)

df_p.loc[iprofil, "adinosN"] = (
df_p.biolume_proxy_adinos[iprofil] * fluoBL_correctionfactor
df_p[f"{prefix}_proxy_adinos"][iprofil] * fluoBL_correctionfactor
)

# preserving adinos+diatoms
df_p.loc[iprofil, "diatomsN"] = (
df_p.biolume_proxy_adinos[iprofil]
+ df_p.biolume_proxy_diatoms[iprofil]
df_p[f"{prefix}_proxy_adinos"][iprofil]
+ df_p[f"{prefix}_proxy_diatoms"][iprofil]
- df_p.adinosN[iprofil]
)

# preserving adinos+hdinos
df_p.loc[iprofil, "hdinosN"] = (
df_p.biolume_proxy_adinos[iprofil]
+ df_p.biolume_proxy_hdinos[iprofil]
df_p[f"{prefix}_proxy_adinos"][iprofil]
+ df_p[f"{prefix}_proxy_hdinos"][iprofil]
- df_p.adinosN[iprofil]
)

target_indices = df_p.index[iprofil]
self.df_r.loc[target_indices, "biolume_proxy_adinos"] = df_p.adinosN.loc[iprofil]
self.df_r.loc[target_indices, "biolume_proxy_diatoms"] = df_p.diatomsN.loc[iprofil]
self.df_r.loc[target_indices, "biolume_proxy_hdinos"] = df_p.hdinosN.loc[iprofil]
self.df_r.loc[target_indices, f"{prefix}_proxy_adinos"] = df_p.adinosN.loc[iprofil]
self.df_r.loc[target_indices, f"{prefix}_proxy_diatoms"] = df_p.diatomsN.loc[
iprofil
]
self.df_r.loc[target_indices, f"{prefix}_proxy_hdinos"] = df_p.hdinosN.loc[iprofil]
else:
self.logger.info(
"profile=%d skipped for proxy correction",
Expand All @@ -1571,7 +1584,11 @@ def _interval_contains_sunevent(
# Also add the fluo_bl_threshold value to the comment attribute
for var in saved_attrs:
self.df_r[var].attrs = saved_attrs[var]
if var in ["biolume_proxy_diatoms", "biolume_proxy_adinos", "biolume_proxy_hdinos"]:
if var in [
f"{prefix}_proxy_diatoms",
f"{prefix}_proxy_adinos",
f"{prefix}_proxy_hdinos",
]:
self.df_r[var].attrs["comment"] += (
f"; corrected with fluo_bl_threshold={fluo_bl_threshold}"
)
Expand Down