diff --git a/.vscode/launch.json b/.vscode/launch.json index 656c8dc..0e7991a 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -140,7 +140,9 @@ // No GPS data for a log_file that has an ESP Sample //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250920T070029/202509200700_202509201900.nc4", "--plot"] // Depth values get offset in time, See GitHub Issue 115. - "args": ["-v", "2", "--log_file", "pontus/missionlogs/2024/20240715_20240725/20240723T023501/202407230235_202407232319.nc4", "--plot"] + //"args": ["-v", "2", "--log_file", "pontus/missionlogs/2024/20240715_20240725/20240723T023501/202407230235_202407232319.nc4", "--plot"] + // Debug missing biolume data + "args": ["-v", "2", "--log_file", "pontus/missionlogs/2025/20250909_20250912/20250911T051742/202509110517_202509112217.nc4", "--plot"] }, @@ -211,7 +213,8 @@ //"args": ["-v", "1", "--log_file", "ahi/missionlogs/2025/20250414_20250418/20250414T205440/202504142054_202504150400.nc4"], //"args": ["--auv_name", "dorado", "--mission", "2022.201.00", "-v", "1"], //"args": ["--auv_name", "dorado", "--mission", "2006.338.11", "-v", "1"], - "args": ["--auv_name", "dorado", "--mission", "2017.347.00", "-v", "1"], + //"args": ["--auv_name", "dorado", "--mission", "2017.347.00", "-v", "1"], + "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250909_20250912/20250911T051742/202509110517_202509112217.nc4"], }, { "name": "5.0 - archive.py", diff --git a/LRAUV_WORKFLOW.md b/LRAUV_WORKFLOW.md index 22a2e9e..d8610c5 100644 --- a/LRAUV_WORKFLOW.md +++ b/LRAUV_WORKFLOW.md @@ -10,26 +10,33 @@ flowchart LR C[combine.py] D[(_combined.nc4)] E[align.py] - F[(_align.nc4)] - G[resample.py] - H[(_nS.nc)] - I[archive.py] - J[(archived files in
/mbari/LRAUV/)] Z --> A A --> B B --> C C --> D D --> E + + style Z fill:#e1f5ff + style B fill:#e1f5ff + style D fill:#e1f5ff +``` + +```mermaid +flowchart LR + E[align.py] + F[(_align.nc4)] + G[resample.py] + H[(_nS.nc)] + I[archive.py] + J[(archived files in
/mbari/LRAUV/)] + E --> F F --> G G --> H H --> I I --> J - style Z fill:#e1f5ff - style B fill:#e1f5ff - style D fill:#e1f5ff style F fill:#e1f5ff style H fill:#e1f5ff style J fill:#e1f5ff diff --git a/src/data/combine.py b/src/data/combine.py index 9ad4c6c..610cd7b 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -52,11 +52,14 @@ import numpy as np import pandas as pd import xarray as xr -from utils import monotonic_increasing_time_indices, nudge_positions +from utils import ( + get_deployment_name, + monotonic_increasing_time_indices, + nudge_positions, +) from common_args import get_standard_lrauv_parser from logs2netcdfs import AUV_NetCDF, TIME, TIME60HZ from nc42netcdfs import BASE_LRAUV_PATH, GROUP -from utils import get_deployment_name AVG_SALINITY = 33.6 # Typical value for upper 100m of Monterey Bay @@ -723,7 +726,60 @@ def _align_ubat_time_coordinates(self, ubat_2d, calib_coeff, time_dim, calib_tim return ubat_2d_aligned, calib_coeff_aligned - def _expand_ubat_to_60hz(self) -> None: + def _handle_hv_step_calibration_coefficient( + self, + calib_coeff: xr.DataArray, + mf_width: int, + ) -> xr.DataArray: + """Handle special treatment for hv_step_calibration_coefficient variable. + + Replace NaN values with the mean of the despiked (median filtered) data. + If there are no valid data at all, replace NaNs with a constant value of 14700000.0. + + Args: + calib_coeff: The calibration coefficient DataArray to process + mf_width: Median filter width for despiking + + Returns: + xr.DataArray: The processed calibration coefficient with NaNs filled + """ + timevar = calib_coeff.dims[0] + + # Apply median filter for despiking + despiked_data = calib_coeff.rolling(**{timevar: mf_width}, center=True).median().to_pandas() + + # Check if there are any non-NaN values in the despiked data + valid_data = despiked_data.dropna() + + if len(valid_data) > 0: + # Calculate mean of despiked data + mean_value = valid_data.mean() + self.logger.info( + "Filling NaN values in hv_step_calibration_coefficient " + "with mean of despiked data: %.2f", + mean_value, + ) + # Replace NaNs with the mean + filled_data = despiked_data.fillna(mean_value) + else: + # No valid data at all, use constant value + constant_value = 14700000.0 + self.logger.info( + "No valid data for hv_step_calibration_coefficient, " + "filling all NaN values with constant: %.1f", + constant_value, + ) + filled_data = despiked_data.fillna(constant_value) + + # Convert back to xarray DataArray with original coordinates and attributes + return xr.DataArray( + filled_data.values, + dims=[timevar], + coords={timevar: calib_coeff.coords[timevar]}, + attrs=calib_coeff.attrs, + ) + + def _expand_ubat_to_60hz(self) -> None: # noqa: PLR0915 """Expand UBAT digitized_raw_ad_counts 2D array into 60hz time series. Replaces the 2D array with a 1D 60Hz time series, analogous to how @@ -804,6 +860,15 @@ def _expand_ubat_to_60hz(self) -> None: len(ubat_time), ) + # Apply special handling for NaN values in calibration coefficient + # Use median filter width of 3 (standard value for despiking) + mf_width = 3 + self.logger.info("Applying NaN handling to wetlabsubat_hv_step_calibration_coefficient") + calib_coeff = self._handle_hv_step_calibration_coefficient(calib_coeff, mf_width) + + # Save the filled calibration coefficient back to the combined dataset + self.combined_nc["wetlabsubat_hv_step_calibration_coefficient"] = calib_coeff + # Multiply raw 60 hz values by the calibration coefficient # Broadcasting: calib_coeff is (m,) and ubat_2d is (m, 60) # This multiplies each row of ubat_2d by the corresponding coefficient diff --git a/src/data/resample.py b/src/data/resample.py index 98296de..0413263 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -2275,6 +2275,7 @@ def process_command_line(self): if resamp.args.log_file: netcdfs_dir = Path(BASE_LRAUV_PATH, f"{Path(resamp.args.log_file).parent}") nc_file = Path(netcdfs_dir, f"{Path(resamp.args.log_file).stem}_align.nc4") + resamp.auv_name = Path(resamp.args.log_file).parts[0] else: file_name = f"{resamp.args.auv_name}_{resamp.args.mission}_align.nc4" nc_file = Path( @@ -2285,7 +2286,7 @@ def process_command_line(self): file_name, ) p_start = time.time() - # Everything that Resampler needs should be in the self described nc_file + # Everything that Resampler needs should be in the self-described nc_file # whether it is Dorado/i2MAP or LRAUV resamp.resample_align_file( nc_file, diff --git a/src/data/test_process_i2map.py b/src/data/test_process_i2map.py index d286ea9..26c2479 100644 --- a/src/data/test_process_i2map.py +++ b/src/data/test_process_i2map.py @@ -30,7 +30,7 @@ def test_process_i2map(complete_i2map_processing): # but it will alert us if a code change unexpectedly changes the file size. # If code changes are expected to change the file size then we should # update the expected size here. - EXPECTED_SIZE_GITHUB = 63131 + EXPECTED_SIZE_GITHUB = 63132 EXPECTED_SIZE_ACT = 63101 EXPECTED_SIZE_LOCAL = 64637 if str(proc.args.base_path).startswith("/home/runner"): diff --git a/uv.lock b/uv.lock index d0144cd..3e4d32d 100644 --- a/uv.lock +++ b/uv.lock @@ -192,6 +192,7 @@ dependencies = [ { name = "jupyter-bokeh" }, { name = "netcdf4" }, { name = "numpy" }, + { name = "pandas" }, { name = "pooch" }, { name = "pyarrow" }, { name = "pygmt" }, @@ -227,6 +228,7 @@ requires-dist = [ { name = "jupyter-bokeh", specifier = ">=4.0.5" }, { name = "netcdf4", specifier = ">=1.7.2" }, { name = "numpy", specifier = ">=2.2.6" }, + { name = "pandas", specifier = ">=2.2.0" }, { name = "pooch", specifier = ">=1.8.2" }, { name = "pyarrow", specifier = ">=20.0.0" }, { name = "pygmt", specifier = "==0.16" },