From 162c998c1b6fbd29720dc909ac4930dd9eb2e090 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 8 Dec 2025 14:36:30 -0800 Subject: [PATCH 01/12] Convert xarray value to float. --- src/data/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/data/utils.py b/src/data/utils.py index 5c74ae2..45ebbc5 100644 --- a/src/data/utils.py +++ b/src/data/utils.py @@ -332,11 +332,11 @@ def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 ) logger.warning( " max(abs(lon)) = %s", - np.max(np.abs(lon[segi] + lon_nudge)), + float(np.max(np.abs(lon[segi] + lon_nudge))), ) logger.warning( " max(abs(lat)) = %s", - np.max(np.abs(lat[segi] + lat_nudge)), + float(np.max(np.abs(lat[segi] + lat_nudge))), ) lon_nudged_array = np.append(lon_nudged_array, lon[segi] + lon_nudge) From 86a62fd78fadea82559b72221d14128cd3ba2385 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 8 Dec 2025 14:37:08 -0800 Subject: [PATCH 02/12] Add a log_file to be QC'ed for Monterey Bay positions. --- .vscode/launch.json | 5 ++++- src/data/combine.py | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 019a6db..a0c64c0 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -134,7 +134,10 @@ // Conflicting sizes for nudged_time and data - fixed by filtering GPS fixes to be monotonically increasing //"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--plot"] // Fails in nudge_positions, maybe bad GPS data? - "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250911T073640/202509110736_202509120809.nc4", "--plot"] + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250911T073640/202509110736_202509120809.nc4", "--plot"] + // Nudged coordinate is way out of reasonable range - segment 15 + "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250913T080940/202509130809_202509140809.nc4", "--plot"] + }, { diff --git a/src/data/combine.py b/src/data/combine.py index 2bc4aa9..3c46552 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -744,6 +744,7 @@ def _initial_coordinate_qc(self) -> None: """Perform initial QC on core coordinate variables for specific log files.""" if self.log_file in ( "tethys/missionlogs/2012/20120908_20120920/20120909T010636/201209090106_201209091521.nc4", + "brizo/missionlogs/2025/20250909_20250915/20250913T080940/202509130809_202509140809.nc4", ): self.logger.info("Performing initial coordinate QC for %s", self.log_file) self._range_qc_combined_nc( From 26ae26eee085495bdaaf810a5bb5c518e91e7fa2 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 9 Dec 2025 12:00:51 -0800 Subject: [PATCH 03/12] For ubat add "_60hz" to group_name, improve sample_rate calculation. --- src/data/align.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/data/align.py b/src/data/align.py index 046954a..5eeeb2b 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -522,7 +522,7 @@ def process_combined(self) -> Path: # noqa: C901, PLR0912, PLR0915 # Process group-based variables (skip coordinate variables) for variable in self.combined_nc: # Skip time coordinate variables - if variable.endswith("_time"): + if variable.endswith(("_time", "_time_60hz")): continue # Skip the navigation coordinate variables themselves @@ -551,6 +551,7 @@ def process_combined(self) -> Path: # noqa: C901, PLR0912, PLR0915 # Extract group name from time coordinate if timevar.endswith("_time_60hz"): group_name = timevar[:-10] # Remove "_time_60hz" (10 chars) + group_name += "_60hz" elif timevar.endswith("_time"): group_name = timevar[:-5] # Remove "_time" else: @@ -563,9 +564,11 @@ def process_combined(self) -> Path: # noqa: C901, PLR0912, PLR0915 # Get the time index for this variable var_time = self.combined_nc[variable].get_index(timevar).view(np.int64).tolist() - # Calculate sampling rate + # Calculate sampling rate using a small sample of time differences + SAMPLE_RATE_SAMPLE = 10 sample_rate = 1.0 / ( - np.mean(np.diff(self.combined_nc[timevar])) / np.timedelta64(1, "s") + np.mean(np.diff(self.combined_nc[timevar][:SAMPLE_RATE_SAMPLE])) + / np.timedelta64(1, "s") ) # Determine coordinate variable names based on group From 1484f7554b8704b4ce5b39b8e181fa0e392f3729 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 9 Dec 2025 12:03:30 -0800 Subject: [PATCH 04/12] Get UBAT's hv_step_calibration_coefficient and multiply by digitized_raw_ad_counts to get 60 hz photons/sec. --- src/data/combine.py | 37 ++++++++++++++++++++++++++++++++++++- src/data/nc42netcdfs.py | 2 ++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/src/data/combine.py b/src/data/combine.py index 3c46552..3680c1c 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -678,6 +678,41 @@ def _expand_ubat_to_60hz(self) -> None: time_dim = ubat_2d.dims[0] n_samples = ubat_2d.shape[1] + # Get calibration coefficient and verify dimensions match + calib_coeff = self.combined_nc["wetlabsubat_hv_step_calibration_coefficient"] + calib_time_dim = calib_coeff.dims[0] + + # Verify the time dimensions are identical in length + if len(self.combined_nc[time_dim]) != len(self.combined_nc[calib_time_dim]): + error_message = ( + f"Dimension mismatch: {time_dim} has {len(self.combined_nc[time_dim])} elements " + f"but {calib_time_dim} has {len(self.combined_nc[calib_time_dim])} elements" + ) + raise ValueError(error_message) + + # Verify the time coordinate values are identical + if not np.allclose( + self.combined_nc[time_dim].to_numpy(), + self.combined_nc[calib_time_dim].to_numpy(), + rtol=1e-9, + ): + error_message = ( + f"Time coordinates {time_dim} and {calib_time_dim} have different values" + ) + raise ValueError(error_message) + + self.logger.info( + "Verified dimensions match: %s and %s both have %d elements", + time_dim, + calib_time_dim, + len(self.combined_nc[time_dim]), + ) + + # Multiply raw 60 hz values by the calibration coefficient + # Broadcasting: calib_coeff is (m,) and ubat_2d is (m, 60) + # This multiplies each row of ubat_2d by the corresponding coefficient + ubat_2d_calibrated = ubat_2d * calib_coeff.to_numpy()[:, np.newaxis] + # Get the time coordinate time_coord = self.combined_nc[time_dim] n_times = len(time_coord) @@ -699,7 +734,7 @@ def _expand_ubat_to_60hz(self) -> None: # Flatten the arrays time_60hz = np.concatenate(time_60hz_list) - data_60hz = ubat_2d.to_numpy().flatten() + data_60hz = ubat_2d_calibrated.to_numpy().flatten() # Remove the old 2D variable del self.combined_nc[ubat_var] diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 844df80..925dc1c 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -77,6 +77,8 @@ {"name": "average_bioluminescence"}, {"name": "flow_rate"}, {"name": "digitized_raw_ad_counts"}, + {"name": "hv_step_calibration_coefficient"}, + {"name": "flowrateCalibCoeff"}, ], } From 91e99d2d2714079e3318626131211c1e6357330e Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 9 Dec 2025 12:05:52 -0800 Subject: [PATCH 05/12] wetlabsubat_flow_rate is in l/s, so no * 1000 needed as it is on dorado. --- .vscode/launch.json | 5 +++-- src/data/resample.py | 7 ++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index a0c64c0..ef04c2c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -369,7 +369,7 @@ // Plankitvore deployment for CeNCOOS Syncro - whole month of April 2025 //"args": ["-v", "1", "--auv_name", "ahi", "--start", "20250401T000000", "--end", "20250502T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] // Fails with ValueError: different number of dimensions on data and dims: 2 vs 1 for wetlabsubat_digitized_raw_ad_counts variable - //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup", "--clobber"] + "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup", "--clobber"] // Full month of June 2025 for Pontus with WetLabsUBAT Group data //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250721T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup", "--clobber"] @@ -383,7 +383,8 @@ //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250921T001456/202509210014_202509221430.nc4", "--no_cleanup"] // A depth coordinate was not found for instrument navigation in //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250903_20250903/20250903T175939/202509031759_202509032026.nc4", "--no_cleanup"] - "args": ["-v", "1", "--auv_name", "brizo", "--start", "20250901T000000", "--end", "20251001T000000", "--no_cleanup"] + // For loading stoqs_lrauv_sep2025 - which has really bad nav in _2S_scieng.nc files + //"args": ["-v", "1", "--auv_name", "brizo", "--start", "20250901T000000", "--end", "20251001T000000", "--no_cleanup"] }, ] diff --git a/src/data/resample.py b/src/data/resample.py index b4f7569..486a991 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -1220,7 +1220,8 @@ def add_wetlabsubat_proxies( # noqa: PLR0913, PLR0915, C901, PLR0912 nighttime_bg_biolume = ( pd.Series(s_min_bg, index=nighttime_ubat_raw.index).resample("1s").mean() ) - nighttime_bg_biolume_perliter = nighttime_bg_biolume.divide(flow) * 1000 + # wetlabsubat_flow_rate is in l/s, so no * 1000 needed here + nighttime_bg_biolume_perliter = nighttime_bg_biolume.divide(flow) pseudo_fluorescence = nighttime_bg_biolume_perliter / proxy_ratio_adinos self.df_r["wetlabsubat_proxy_adinos"] = ( np.minimum(fluo, pseudo_fluorescence) / proxy_cal_factor @@ -1302,8 +1303,8 @@ def select_nighttime_ubat_raw( nighttime_data = ( self.ds["wetlabsubat_digitized_raw_ad_counts"] .where( - (self.ds["wetlabsubat_time_60hz"] > sunset) - & (self.ds["wetlabsubat_time_60hz"] < sunrise), + (self.ds["wetlabsubat_digitized_raw_ad_counts_time_60hz"] > sunset) + & (self.ds["wetlabsubat_digitized_raw_ad_counts_time_60hz"] < sunrise), ) .to_pandas() .dropna() From 77f280f9558eddfc5155e4248957d6b8545b0eb8 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 9 Dec 2025 13:10:02 -0800 Subject: [PATCH 06/12] Return early if no s_ubat_raw data, generalize time coord name for wetlabsubat_digitized_raw_ad_counts. --- .vscode/launch.json | 6 +++--- src/data/resample.py | 8 ++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index ef04c2c..d68bb65 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -369,10 +369,10 @@ // Plankitvore deployment for CeNCOOS Syncro - whole month of April 2025 //"args": ["-v", "1", "--auv_name", "ahi", "--start", "20250401T000000", "--end", "20250502T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] // Fails with ValueError: different number of dimensions on data and dims: 2 vs 1 for wetlabsubat_digitized_raw_ad_counts variable - "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup", "--clobber"] + //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup", "--clobber"] // Full month of June 2025 for Pontus with WetLabsUBAT Group data - //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250721T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] - //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup", "--clobber"] + "args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250721T000000", "--no_cleanup"] + //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250721T000000", "--noinput", "--num_cores", "1", "--no_cleanup", "--clobber"] //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250623_20250707/20250707T043011/slate.nc4", "--no_cleanup"] //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250903_20250903/20250903T202626/202509032026_202509032030.nc4", "--no_cleanup"] // Does not have ctdseabird data, hence no coordinates for resampling diff --git a/src/data/resample.py b/src/data/resample.py index 486a991..d47bf1b 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -1006,6 +1006,10 @@ def add_wetlabsubat_proxies( # noqa: PLR0913, PLR0915, C901, PLR0912 # s_ubat_raw includes daytime data - see below for nighttime data s_ubat_raw = self.ds["wetlabsubat_digitized_raw_ad_counts"].to_pandas().dropna() + if s_ubat_raw.empty: + self.logger.info("No wetlabsubat_digitized_raw_ad_counts data to compute proxies") + return pd.Series(dtype="float64"), [], [] + # Compute background biolumenesence envelope self.logger.debug("Applying rolling min filter") min_bg_unsmoothed = s_ubat_raw.rolling( @@ -1300,11 +1304,11 @@ def select_nighttime_ubat_raw( sunset, sunrise, ) + time_coord_name = self.ds["wetlabsubat_digitized_raw_ad_counts"].dims[0] nighttime_data = ( self.ds["wetlabsubat_digitized_raw_ad_counts"] .where( - (self.ds["wetlabsubat_digitized_raw_ad_counts_time_60hz"] > sunset) - & (self.ds["wetlabsubat_digitized_raw_ad_counts_time_60hz"] < sunrise), + (self.ds[time_coord_name] > sunset) & (self.ds[time_coord_name] < sunrise), ) .to_pandas() .dropna() From ea4b4e23838e307d81d97de8abe4b4e9443c6737 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 9 Dec 2025 13:10:21 -0800 Subject: [PATCH 07/12] Subtract 0.5 seconds because 60Hz data are logged at the end of the 1-second period. --- src/data/combine.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/data/combine.py b/src/data/combine.py index 3680c1c..ca8d74c 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -722,7 +722,8 @@ def _expand_ubat_to_60hz(self) -> None: # Calculate 60hz time offsets (assuming samples span 1 second) # Each sample is 1/60th of a second apart - sample_offsets = np.arange(n_samples) / 60.0 + # Subtract 0.5 seconds because 60Hz data are logged at the end of the 1-second period + sample_offsets = np.arange(n_samples) / 60.0 - 0.5 # Create 60hz time series by adding offsets to each 1Hz time time_60hz_list = [] From 2e173b1d478697f314c49b5d91e0af99a1fbd60c Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 9 Dec 2025 13:27:55 -0800 Subject: [PATCH 08/12] Add wetlabsubat_hv_step_calibration_coefficient to mocked data. --- src/data/test_process_lrauv.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/data/test_process_lrauv.py b/src/data/test_process_lrauv.py index 06e298b..49b44d2 100644 --- a/src/data/test_process_lrauv.py +++ b/src/data/test_process_lrauv.py @@ -227,6 +227,10 @@ def test_ubat_60hz_expansion(tmp_path): ["wetlabsubat_time", "sample"], np.random.randint(0, 1000, (len(time_vals), 60)), ), + "wetlabsubat_hv_step_calibration_coefficient": ( + ["wetlabsubat_time"], + np.ones(len(time_vals)) * 1.5, # Example calibration coefficient + ), }, coords={"wetlabsubat_time": time_seconds}, ) @@ -236,6 +240,10 @@ def test_ubat_60hz_expansion(tmp_path): "long_name": "Digitized raw AD counts", "comment": "Test UBAT data", } + combine.combined_nc["wetlabsubat_hv_step_calibration_coefficient"].attrs = { + "long_name": "HV step calibration coefficient", + "comment": "Test calibration coefficient", + } # Call the expansion method combine._expand_ubat_to_60hz() From 6aacbabaee206b87d8a99669055decce393a014d Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 9 Dec 2025 13:29:06 -0800 Subject: [PATCH 09/12] Add _align_ubat_time_coordinates(). Fixes this exception: ValueError: Dimension mismatch: wetlabsubat_digitized_raw_ad_counts_time has 33154 elements but wetlabsubat_hv_step_calibration_coefficient_time has 33155 elements --- .vscode/launch.json | 4 ++- src/data/combine.py | 76 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 63 insertions(+), 17 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index d68bb65..452912c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -370,8 +370,10 @@ //"args": ["-v", "1", "--auv_name", "ahi", "--start", "20250401T000000", "--end", "20250502T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] // Fails with ValueError: different number of dimensions on data and dims: 2 vs 1 for wetlabsubat_digitized_raw_ad_counts variable //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup", "--clobber"] + // ValueError: Dimension mismatch: wetlabsubat_digitized_raw_ad_counts_time has 33154 elements but wetlabsubat_hv_step_calibration_coefficient_time has 33155 elements + "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250623_20250707/20250626T041517/202506260415_202506261400.nc4", "--no_cleanup"] // Full month of June 2025 for Pontus with WetLabsUBAT Group data - "args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250721T000000", "--no_cleanup"] + //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250721T000000", "--no_cleanup"] //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250721T000000", "--noinput", "--num_cores", "1", "--no_cleanup", "--clobber"] //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250623_20250707/20250707T043011/slate.nc4", "--no_cleanup"] //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250903_20250903/20250903T202626/202509032026_202509032030.nc4", "--no_cleanup"] diff --git a/src/data/combine.py b/src/data/combine.py index ca8d74c..ed67dbf 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -92,6 +92,7 @@ class Combine_NetCDF: logger.addHandler(_handler) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) variable_time_coord_mapping: dict = {} + TIME_MATCH_TOLERANCE = 1e-6 # seconds tolerance for matching time values def __init__( self, @@ -652,6 +653,41 @@ def _add_consolidation_comment(self, time_info: dict) -> None: f"Consolidated time coordinate from: {mapping_info}" ) + def _align_ubat_time_coordinates(self, ubat_2d, calib_coeff, time_dim, calib_time_dim): + """Align UBAT and calibration coefficient time coordinates by finding common times.""" + ubat_time = self.combined_nc[time_dim].to_numpy() + calib_time = self.combined_nc[calib_time_dim].to_numpy() + + # Find intersection of time values + common_indices_ubat = [] + common_indices_calib = [] + + for i, t_ubat in enumerate(ubat_time): + # Find matching time in calib_time + matches = np.where(np.abs(calib_time - t_ubat) < self.TIME_MATCH_TOLERANCE)[0] + if len(matches) > 0: + common_indices_ubat.append(i) + common_indices_calib.append(matches[0]) + + if len(common_indices_ubat) == 0: + error_message = f"No common time values found between {time_dim} and {calib_time_dim}" + raise ValueError(error_message) + + self.logger.info( + "Found %d common time values out of %d in %s and %d in %s", + len(common_indices_ubat), + len(ubat_time), + time_dim, + len(calib_time), + calib_time_dim, + ) + + # Subset both arrays to common times + ubat_2d_aligned = ubat_2d.isel({time_dim: common_indices_ubat}) + calib_coeff_aligned = calib_coeff.isel({calib_time_dim: common_indices_calib}) + + return ubat_2d_aligned, calib_coeff_aligned + def _expand_ubat_to_60hz(self) -> None: """Expand UBAT digitized_raw_ad_counts 2D array into 60hz time series. @@ -682,22 +718,30 @@ def _expand_ubat_to_60hz(self) -> None: calib_coeff = self.combined_nc["wetlabsubat_hv_step_calibration_coefficient"] calib_time_dim = calib_coeff.dims[0] - # Verify the time dimensions are identical in length - if len(self.combined_nc[time_dim]) != len(self.combined_nc[calib_time_dim]): - error_message = ( - f"Dimension mismatch: {time_dim} has {len(self.combined_nc[time_dim])} elements " - f"but {calib_time_dim} has {len(self.combined_nc[calib_time_dim])} elements" + # Handle dimension mismatch by finding common time values + ubat_time = self.combined_nc[time_dim].to_numpy() + calib_time = self.combined_nc[calib_time_dim].to_numpy() + + if len(ubat_time) != len(calib_time): + self.logger.warning( + "Dimension mismatch: %s has %d elements but %s has %d elements - " + "finding common time values", + time_dim, + len(ubat_time), + calib_time_dim, + len(calib_time), ) - raise ValueError(error_message) + ubat_2d, calib_coeff = self._align_ubat_time_coordinates( + ubat_2d, calib_coeff, time_dim, calib_time_dim + ) + ubat_time = ubat_2d.coords[time_dim].to_numpy() + calib_time = calib_coeff.coords[calib_time_dim].to_numpy() - # Verify the time coordinate values are identical - if not np.allclose( - self.combined_nc[time_dim].to_numpy(), - self.combined_nc[calib_time_dim].to_numpy(), - rtol=1e-9, - ): + # Verify the time coordinate values are now identical + if not np.allclose(ubat_time, calib_time, rtol=1e-9): error_message = ( - f"Time coordinates {time_dim} and {calib_time_dim} have different values" + f"Time coordinates {time_dim} and {calib_time_dim} have different values " + "even after alignment" ) raise ValueError(error_message) @@ -705,7 +749,7 @@ def _expand_ubat_to_60hz(self) -> None: "Verified dimensions match: %s and %s both have %d elements", time_dim, calib_time_dim, - len(self.combined_nc[time_dim]), + len(ubat_time), ) # Multiply raw 60 hz values by the calibration coefficient @@ -713,8 +757,8 @@ def _expand_ubat_to_60hz(self) -> None: # This multiplies each row of ubat_2d by the corresponding coefficient ubat_2d_calibrated = ubat_2d * calib_coeff.to_numpy()[:, np.newaxis] - # Get the time coordinate - time_coord = self.combined_nc[time_dim] + # Get the time coordinate (use ubat_2d's time coordinate after alignment) + time_coord = ubat_2d.coords[time_dim] n_times = len(time_coord) # Save original attributes before removing From f14e7c927cf70fe6cb57efd5fbeb651dd7869cf5 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 9 Dec 2025 13:33:31 -0800 Subject: [PATCH 10/12] Update EXPECTED_MD5_GITHUB and testing June 2025 pontus run. --- .vscode/launch.json | 4 ++-- src/data/test_process_dorado.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 452912c..360ed41 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -371,9 +371,9 @@ // Fails with ValueError: different number of dimensions on data and dims: 2 vs 1 for wetlabsubat_digitized_raw_ad_counts variable //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup", "--clobber"] // ValueError: Dimension mismatch: wetlabsubat_digitized_raw_ad_counts_time has 33154 elements but wetlabsubat_hv_step_calibration_coefficient_time has 33155 elements - "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250623_20250707/20250626T041517/202506260415_202506261400.nc4", "--no_cleanup"] + //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250623_20250707/20250626T041517/202506260415_202506261400.nc4", "--no_cleanup"] // Full month of June 2025 for Pontus with WetLabsUBAT Group data - //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250721T000000", "--no_cleanup"] + "args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250721T000000", "--no_cleanup"] //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250721T000000", "--noinput", "--num_cores", "1", "--no_cleanup", "--clobber"] //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250623_20250707/20250707T043011/slate.nc4", "--no_cleanup"] //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250903_20250903/20250903T202626/202509032026_202509032030.nc4", "--no_cleanup"] diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index 35520db..88217bb 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -50,7 +50,7 @@ def test_process_dorado(complete_dorado_processing): check_md5 = True if check_md5: # Check that the MD5 hash has not changed - EXPECTED_MD5_GITHUB = "432690b72faf604e9845cfe6c3eb5b3e" + EXPECTED_MD5_GITHUB = "fec067d16eb5280a8bc2b6ef132821b8" EXPECTED_MD5_ACT = "316955fd489862ad9ed5b63df8aa7db8" EXPECTED_MD5_LOCAL = "f635cee8760aa0f40bd2070bc0c5fa65" if str(proc.args.base_path).startswith("/home/runner"): From 2cf2c9a9dc1851e84542a18071dc6a612123d86b Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 9 Dec 2025 14:04:50 -0800 Subject: [PATCH 11/12] Return early if "wetlabsubat_hv_step_calibration_coefficient" not in self.combined_nc. --- src/data/combine.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/data/combine.py b/src/data/combine.py index ed67dbf..43ced62 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -714,6 +714,10 @@ def _expand_ubat_to_60hz(self) -> None: time_dim = ubat_2d.dims[0] n_samples = ubat_2d.shape[1] + if "wetlabsubat_hv_step_calibration_coefficient" not in self.combined_nc: + self.logger.warning("No UBAT calibration coefficient found, skipping 60hz expansion") + return + # Get calibration coefficient and verify dimensions match calib_coeff = self.combined_nc["wetlabsubat_hv_step_calibration_coefficient"] calib_time_dim = calib_coeff.dims[0] From c1303708c65041a435da44c7191e061d448bf834 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 9 Dec 2025 15:37:28 -0800 Subject: [PATCH 12/12] If no UBAT calibration coefficient found, remove 2D variable to avoid confusion downstream. --- .vscode/launch.json | 4 +++- src/data/combine.py | 37 ++++++++++++++++++++++++------------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 360ed41..6f95cea 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -372,8 +372,10 @@ //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup", "--clobber"] // ValueError: Dimension mismatch: wetlabsubat_digitized_raw_ad_counts_time has 33154 elements but wetlabsubat_hv_step_calibration_coefficient_time has 33155 elements //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250623_20250707/20250626T041517/202506260415_202506261400.nc4", "--no_cleanup"] + // ValueError: coords is not dict-like, but it has 1 items, which does not match the 2 dimensions of the data + "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250623_20250707/20250626T140000/202506261400_202506262031.nc4", "--no_cleanup"] // Full month of June 2025 for Pontus with WetLabsUBAT Group data - "args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250721T000000", "--no_cleanup"] + //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250721T000000", "--no_cleanup"] //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250721T000000", "--noinput", "--num_cores", "1", "--no_cleanup", "--clobber"] //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250623_20250707/20250707T043011/slate.nc4", "--no_cleanup"] //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250903_20250903/20250903T202626/202509032026_202509032030.nc4", "--no_cleanup"] diff --git a/src/data/combine.py b/src/data/combine.py index 43ced62..663c712 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -693,6 +693,9 @@ def _expand_ubat_to_60hz(self) -> None: Replaces the 2D array with a 1D 60Hz time series, analogous to how Dorado biolume_raw is stored with a time60hz coordinate. + + If expansion cannot be performed (e.g., not 2D data), the 2D variable + is removed to avoid confusing downstream consumers. """ ubat_var = "wetlabsubat_digitized_raw_ad_counts" @@ -714,8 +717,18 @@ def _expand_ubat_to_60hz(self) -> None: time_dim = ubat_2d.dims[0] n_samples = ubat_2d.shape[1] + self.logger.info( + "UBAT data shape: %s, time dimension: %s with %d points", + ubat_2d.shape, + time_dim, + len(self.combined_nc[time_dim]), + ) + if "wetlabsubat_hv_step_calibration_coefficient" not in self.combined_nc: - self.logger.warning("No UBAT calibration coefficient found, skipping 60hz expansion") + self.logger.warning( + "No UBAT calibration coefficient found, removing 2D variable to avoid confusion" + ) + del self.combined_nc[ubat_var] return # Get calibration coefficient and verify dimensions match @@ -788,27 +801,25 @@ def _expand_ubat_to_60hz(self) -> None: # Remove the old 2D variable del self.combined_nc[ubat_var] - # Create new 60hz time coordinate with attributes + # Create new 60hz time coordinate name time_60hz_name = f"{time_dim}_60hz" - time_60hz_coord = xr.DataArray( - time_60hz, - dims=[time_60hz_name], - name=time_60hz_name, - attrs={ - "units": "seconds since 1970-01-01T00:00:00Z", - "standard_name": "time", - "long_name": "Time at 60Hz sampling rate", - }, - ) # Create replacement 1D variable with 60hz time coordinate + # Pass the numpy array as the coordinate value, not a DataArray self.combined_nc[ubat_var] = xr.DataArray( data_60hz, - coords={time_60hz_name: time_60hz_coord}, + coords={time_60hz_name: time_60hz}, dims=[time_60hz_name], name=ubat_var, ) + # Add attributes to the time coordinate + self.combined_nc[ubat_var].coords[time_60hz_name].attrs = { + "units": "seconds since 1970-01-01T00:00:00Z", + "standard_name": "time", + "long_name": "Time at 60Hz sampling rate", + } + # Restore and update attributes self.combined_nc[ubat_var].attrs = original_attrs self.combined_nc[ubat_var].attrs["long_name"] = "UBAT digitized raw AD counts at 60Hz"