diff --git a/.vscode/launch.json b/.vscode/launch.json
index 656c8dc..0e7991a 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -140,7 +140,9 @@
// No GPS data for a log_file that has an ESP Sample
//"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250920T070029/202509200700_202509201900.nc4", "--plot"]
// Depth values get offset in time, See GitHub Issue 115.
- "args": ["-v", "2", "--log_file", "pontus/missionlogs/2024/20240715_20240725/20240723T023501/202407230235_202407232319.nc4", "--plot"]
+ //"args": ["-v", "2", "--log_file", "pontus/missionlogs/2024/20240715_20240725/20240723T023501/202407230235_202407232319.nc4", "--plot"]
+ // Debug missing biolume data
+ "args": ["-v", "2", "--log_file", "pontus/missionlogs/2025/20250909_20250912/20250911T051742/202509110517_202509112217.nc4", "--plot"]
},
@@ -211,7 +213,8 @@
//"args": ["-v", "1", "--log_file", "ahi/missionlogs/2025/20250414_20250418/20250414T205440/202504142054_202504150400.nc4"],
//"args": ["--auv_name", "dorado", "--mission", "2022.201.00", "-v", "1"],
//"args": ["--auv_name", "dorado", "--mission", "2006.338.11", "-v", "1"],
- "args": ["--auv_name", "dorado", "--mission", "2017.347.00", "-v", "1"],
+ //"args": ["--auv_name", "dorado", "--mission", "2017.347.00", "-v", "1"],
+ "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250909_20250912/20250911T051742/202509110517_202509112217.nc4"],
},
{
"name": "5.0 - archive.py",
diff --git a/LRAUV_WORKFLOW.md b/LRAUV_WORKFLOW.md
index 22a2e9e..d8610c5 100644
--- a/LRAUV_WORKFLOW.md
+++ b/LRAUV_WORKFLOW.md
@@ -10,26 +10,33 @@ flowchart LR
C[combine.py]
D[(_combined.nc4)]
E[align.py]
- F[(_align.nc4)]
- G[resample.py]
- H[(_nS.nc)]
- I[archive.py]
- J[(archived files in
/mbari/LRAUV/)]
Z --> A
A --> B
B --> C
C --> D
D --> E
+
+ style Z fill:#e1f5ff
+ style B fill:#e1f5ff
+ style D fill:#e1f5ff
+```
+
+```mermaid
+flowchart LR
+ E[align.py]
+ F[(_align.nc4)]
+ G[resample.py]
+ H[(_nS.nc)]
+ I[archive.py]
+ J[(archived files in
/mbari/LRAUV/)]
+
E --> F
F --> G
G --> H
H --> I
I --> J
- style Z fill:#e1f5ff
- style B fill:#e1f5ff
- style D fill:#e1f5ff
style F fill:#e1f5ff
style H fill:#e1f5ff
style J fill:#e1f5ff
diff --git a/src/data/combine.py b/src/data/combine.py
index 9ad4c6c..610cd7b 100755
--- a/src/data/combine.py
+++ b/src/data/combine.py
@@ -52,11 +52,14 @@
import numpy as np
import pandas as pd
import xarray as xr
-from utils import monotonic_increasing_time_indices, nudge_positions
+from utils import (
+ get_deployment_name,
+ monotonic_increasing_time_indices,
+ nudge_positions,
+)
from common_args import get_standard_lrauv_parser
from logs2netcdfs import AUV_NetCDF, TIME, TIME60HZ
from nc42netcdfs import BASE_LRAUV_PATH, GROUP
-from utils import get_deployment_name
AVG_SALINITY = 33.6 # Typical value for upper 100m of Monterey Bay
@@ -723,7 +726,60 @@ def _align_ubat_time_coordinates(self, ubat_2d, calib_coeff, time_dim, calib_tim
return ubat_2d_aligned, calib_coeff_aligned
- def _expand_ubat_to_60hz(self) -> None:
+ def _handle_hv_step_calibration_coefficient(
+ self,
+ calib_coeff: xr.DataArray,
+ mf_width: int,
+ ) -> xr.DataArray:
+ """Handle special treatment for hv_step_calibration_coefficient variable.
+
+ Replace NaN values with the mean of the despiked (median filtered) data.
+ If there are no valid data at all, replace NaNs with a constant value of 14700000.0.
+
+ Args:
+ calib_coeff: The calibration coefficient DataArray to process
+ mf_width: Median filter width for despiking
+
+ Returns:
+ xr.DataArray: The processed calibration coefficient with NaNs filled
+ """
+ timevar = calib_coeff.dims[0]
+
+ # Apply median filter for despiking
+ despiked_data = calib_coeff.rolling(**{timevar: mf_width}, center=True).median().to_pandas()
+
+ # Check if there are any non-NaN values in the despiked data
+ valid_data = despiked_data.dropna()
+
+ if len(valid_data) > 0:
+ # Calculate mean of despiked data
+ mean_value = valid_data.mean()
+ self.logger.info(
+ "Filling NaN values in hv_step_calibration_coefficient "
+ "with mean of despiked data: %.2f",
+ mean_value,
+ )
+ # Replace NaNs with the mean
+ filled_data = despiked_data.fillna(mean_value)
+ else:
+ # No valid data at all, use constant value
+ constant_value = 14700000.0
+ self.logger.info(
+ "No valid data for hv_step_calibration_coefficient, "
+ "filling all NaN values with constant: %.1f",
+ constant_value,
+ )
+ filled_data = despiked_data.fillna(constant_value)
+
+ # Convert back to xarray DataArray with original coordinates and attributes
+ return xr.DataArray(
+ filled_data.values,
+ dims=[timevar],
+ coords={timevar: calib_coeff.coords[timevar]},
+ attrs=calib_coeff.attrs,
+ )
+
+ def _expand_ubat_to_60hz(self) -> None: # noqa: PLR0915
"""Expand UBAT digitized_raw_ad_counts 2D array into 60hz time series.
Replaces the 2D array with a 1D 60Hz time series, analogous to how
@@ -804,6 +860,15 @@ def _expand_ubat_to_60hz(self) -> None:
len(ubat_time),
)
+ # Apply special handling for NaN values in calibration coefficient
+ # Use median filter width of 3 (standard value for despiking)
+ mf_width = 3
+ self.logger.info("Applying NaN handling to wetlabsubat_hv_step_calibration_coefficient")
+ calib_coeff = self._handle_hv_step_calibration_coefficient(calib_coeff, mf_width)
+
+ # Save the filled calibration coefficient back to the combined dataset
+ self.combined_nc["wetlabsubat_hv_step_calibration_coefficient"] = calib_coeff
+
# Multiply raw 60 hz values by the calibration coefficient
# Broadcasting: calib_coeff is (m,) and ubat_2d is (m, 60)
# This multiplies each row of ubat_2d by the corresponding coefficient
diff --git a/src/data/resample.py b/src/data/resample.py
index 98296de..0413263 100755
--- a/src/data/resample.py
+++ b/src/data/resample.py
@@ -2275,6 +2275,7 @@ def process_command_line(self):
if resamp.args.log_file:
netcdfs_dir = Path(BASE_LRAUV_PATH, f"{Path(resamp.args.log_file).parent}")
nc_file = Path(netcdfs_dir, f"{Path(resamp.args.log_file).stem}_align.nc4")
+ resamp.auv_name = Path(resamp.args.log_file).parts[0]
else:
file_name = f"{resamp.args.auv_name}_{resamp.args.mission}_align.nc4"
nc_file = Path(
@@ -2285,7 +2286,7 @@ def process_command_line(self):
file_name,
)
p_start = time.time()
- # Everything that Resampler needs should be in the self described nc_file
+ # Everything that Resampler needs should be in the self-described nc_file
# whether it is Dorado/i2MAP or LRAUV
resamp.resample_align_file(
nc_file,
diff --git a/src/data/test_process_i2map.py b/src/data/test_process_i2map.py
index d286ea9..26c2479 100644
--- a/src/data/test_process_i2map.py
+++ b/src/data/test_process_i2map.py
@@ -30,7 +30,7 @@ def test_process_i2map(complete_i2map_processing):
# but it will alert us if a code change unexpectedly changes the file size.
# If code changes are expected to change the file size then we should
# update the expected size here.
- EXPECTED_SIZE_GITHUB = 63131
+ EXPECTED_SIZE_GITHUB = 63132
EXPECTED_SIZE_ACT = 63101
EXPECTED_SIZE_LOCAL = 64637
if str(proc.args.base_path).startswith("/home/runner"):
diff --git a/uv.lock b/uv.lock
index d0144cd..3e4d32d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -192,6 +192,7 @@ dependencies = [
{ name = "jupyter-bokeh" },
{ name = "netcdf4" },
{ name = "numpy" },
+ { name = "pandas" },
{ name = "pooch" },
{ name = "pyarrow" },
{ name = "pygmt" },
@@ -227,6 +228,7 @@ requires-dist = [
{ name = "jupyter-bokeh", specifier = ">=4.0.5" },
{ name = "netcdf4", specifier = ">=1.7.2" },
{ name = "numpy", specifier = ">=2.2.6" },
+ { name = "pandas", specifier = ">=2.2.0" },
{ name = "pooch", specifier = ">=1.8.2" },
{ name = "pyarrow", specifier = ">=20.0.0" },
{ name = "pygmt", specifier = "==0.16" },