From 4f394569d53703cd742cebee660cb36289acc327 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 18 Nov 2025 12:55:14 -0800 Subject: [PATCH 1/7] Add nudged_ variables, add variable_time_coord_mapping to global metadata. Need variable_time_coord_mapping in metadata so that align.py can use it for setting coordinates attribute for each variable. --- src/data/combine.py | 52 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index 0f04e03d..a1f40b70 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -39,6 +39,7 @@ __copyright__ = "Copyright 2025, Monterey Bay Aquarium Research Institute" import argparse # noqa: I001 +import json import logging import sys import time @@ -89,6 +90,7 @@ class Combine_NetCDF: _handler.setFormatter(AUV_NetCDF._formatter) logger.addHandler(_handler) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) + variable_time_coord_mapping: dict = {} def global_metadata(self): """Use instance variables to return a dictionary of @@ -119,6 +121,7 @@ def global_metadata(self): metadata["license"] = metadata["distribution_statement"] metadata["useconst"] = "Not intended for legal use. Data may contain inaccuracies." metadata["history"] = f"Created by {self.commandline} on {iso_now}" + metadata["variable_time_coord_mapping"] = json.dumps(self.variable_time_coord_mapping) log_file = self.args.log_file metadata["title"] = ( f"Combined LRAUV data from {log_file} - relevant variables extracted for STOQS" @@ -339,6 +342,7 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic - consolidated_time_name: name of consolidated coordinate (or None) - consolidated_time_data: the time coordinate data (or None) - time_coord_mapping: dict mapping original dims to consolidated dims + - variable_time_coord_mapping: dict mapping variables to their time coords """ # Find all time variables in this group time_vars = {var: ds[var] for var in ds.variables if var.lower().endswith("time")} @@ -348,6 +352,7 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic "consolidated_time_name": None, "consolidated_time_data": None, "time_coord_mapping": {}, + "variable_time_coord_mapping": {}, } if len(time_vars) == 1: @@ -360,10 +365,15 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic time_name, consolidated_name, ) + time_coord_mapping = {time_name: consolidated_name} return { "consolidated_time_name": consolidated_name, "consolidated_time_data": ds[time_name], - "time_coord_mapping": {time_name: consolidated_name}, + "time_coord_mapping": time_coord_mapping, + "variable_time_coord_mapping": { + f"{group_name}_{k.split('_time')[0].lower()}": v + for k, v in time_coord_mapping.items() + }, } # Multiple time coordinates - check if they're identical @@ -427,6 +437,10 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic "consolidated_time_name": consolidated_name, "consolidated_time_data": ds[first_time_name], "time_coord_mapping": time_coord_mapping, + "variable_time_coord_mapping": { + f"{group_name}_{k.split('_time')[0].lower()}": consolidated_name + for k in time_vars + }, } # Time coordinates differ - keep them separate @@ -442,6 +456,10 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic "consolidated_time_name": None, "consolidated_time_data": None, "time_coord_mapping": time_coord_mapping, + "variable_time_coord_mapping": { + f"{group_name}_{k.split('_time')[0].lower()}": v + for k, v in time_coord_mapping.items() + }, } def _add_time_coordinates_to_combined(self, time_info: dict, ds: xr.Dataset) -> None: @@ -499,6 +517,7 @@ def _create_data_array_for_variable( ) data_array.attrs = ds[orig_var].attrs.copy() data_array.attrs["units"] = "degrees" + data_array.attrs["coordinates"] = f"{dim_name}" else: data_array = xr.DataArray( ds[orig_var].to_numpy(), @@ -506,6 +525,8 @@ def _create_data_array_for_variable( coords={dim_name: time_coord_data}, ) data_array.attrs = ds[orig_var].attrs.copy() + data_array.attrs["comment"] = f"{orig_var} from group {ds.attrs.get('group_name', '')}" + data_array.attrs["coordinates"] = f"{dim_name}" return data_array def _add_time_metadata_to_variable(self, var_name: str, dim_name: str) -> None: @@ -585,19 +606,35 @@ def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10) -> None: segment_count, segment_minsum, ) - self.combined_nc["nudged_longitude"] = nudged_longitude + self.combined_nc["nudged_longitude"] = xr.DataArray( + nudged_longitude, + coords=[self.combined_nc["universals_time"].to_numpy()], + dims={f"nudged_{TIME}"}, + name="nudged_longitude", + ) self.combined_nc["nudged_longitude"].attrs = { "long_name": "Nudged Longitude", "standard_name": "longitude", "units": "degrees_east", - "comment": "Dead reckoned longitude nudged to GPS positions", + "comment": ( + f"Dead reckoned positions from {segment_count} underwater segments " + f"nudged to GPS positions" + ), } - self.combined_nc["nudged_latitude"] = nudged_latitude + self.combined_nc["nudged_latitude"] = xr.DataArray( + nudged_latitude, + coords=[self.combined_nc["universals_time"].to_numpy()], + dims={f"nudged_{TIME}"}, + name="nudged_latitude", + ) self.combined_nc["nudged_latitude"].attrs = { "long_name": "Nudged Latitude", "standard_name": "latitude", "units": "degrees_north", - "comment": "Dead reckoned latitude nudged to GPS positions", + "comment": ( + f"Dead reckoned positions from {segment_count} underwater segments " + f"nudged to GPS positions" + ), } def combine_groups(self): @@ -625,6 +662,9 @@ def combine_groups(self): # Add consolidation comment if applicable self._add_consolidation_comment(time_info) + # Collect variable coordinate mapping by group, which can be flattened + self.variable_time_coord_mapping.update(time_info["variable_time_coord_mapping"]) + # Write intermediate file for cf_xarray decoding intermediate_file = self._intermediate_write_netcdf() with xr.open_dataset(intermediate_file, decode_cf=True) as ds: @@ -634,7 +674,7 @@ def combine_groups(self): self._add_nudged_coordinates() # Clean up intermediate file - ##Path(intermediate_file).unlink() + Path(intermediate_file).unlink() def _intermediate_write_netcdf(self) -> None: """Write out an intermediate combined netCDF file so that data can be From 63af3a1775cfe38519e8265339a1c06338cf0571 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 18 Nov 2025 12:55:43 -0800 Subject: [PATCH 2/7] Add group_name to the global metadata. --- src/data/nc42netcdfs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index ed778f82..f4f5f51d 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -1021,6 +1021,7 @@ def global_metadata(self, log_file: str, group_name: str): f" using git commit {gitcommit} from" f" software at 'https://github.com/mbari-org/auv-python'" ) + metadata["group_name"] = group_name metadata["summary"] = ( "Observational oceanographic data obtained from a Long Range Autonomous" " Underwater Vehicle mission with measurements at original sampling" From a6880c0260147811caf3a209dc1ae4a0be814771 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 18 Nov 2025 16:49:16 -0800 Subject: [PATCH 3/7] Fixup metadata - use mapping from global variable_time_coord_mapping attribute for coordinates. --- src/data/align.py | 154 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 116 insertions(+), 38 deletions(-) diff --git a/src/data/align.py b/src/data/align.py index 703c0edb..81cf28fe 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -13,6 +13,7 @@ __copyright__ = "Copyright 2021, Monterey Bay Aquarium Research Institute" import argparse +import json import logging import os import re @@ -113,10 +114,17 @@ def global_metadata(self): f" host {actual_hostname} using git commit {gitcommit} from" f" software at 'https://github.com/mbari-org/auv-python'" ) + metadata["summary"] = ( + "Observational oceanographic data obtained from an Autonomous" + " Underwater Vehicle mission with measurements at" + " original sampling intervals. The data have been calibrated" + " and the coordinate variables aligned using MBARI's auv-python" + " software." + ) elif self.args.log_file: metadata["title"] = ( f"Combined and aligned LRAUV instrument data from" - f" log file {Path(self.args.log_file).name}" + f" log file {Path(self.args.log_file)}" ) from_data = "combined data" metadata["source"] = ( @@ -125,32 +133,38 @@ def global_metadata(self): f" host {actual_hostname} using git commit {gitcommit} from" f" software at 'https://github.com/mbari-org/auv-python'" ) - metadata["summary"] = ( - "Observational oceanographic data obtained from an Autonomous" - " Underwater Vehicle mission with measurements at" - " original sampling intervals. The data have been calibrated" - " and the coordinate variables aligned using MBARI's auv-python" - " software." - ) + metadata["summary"] = ( + "Observational oceanographic data obtained from an Autonomous" + " Underwater Vehicle mission with measurements at" + " original sampling intervals. The position variables have been" + " corrected to GPS positions and aligned with the data variables" + " using MBARI's auv-python software." + ) # Append location of original data files to summary if self.args.auv_name and self.args.mission: matches = re.search( "(" + SUMMARY_SOURCE.replace("{}", r".+$") + ")", self.calibrated_nc.attrs["summary"], ) + metadata["comment"] = ( + f"MBARI Dorado-class AUV data produced from calibrated data" + f" with execution of '{self.commandline}' at {iso_now} on" + f" host {gethostname()}. Software available at" + f" 'https://github.com/mbari-org/auv-python'" + ) elif self.args.log_file: matches = re.search( "(" + SUMMARY_SOURCE.replace("{}", r".+$") + ")", self.combined_nc.attrs["summary"], ) + metadata["comment"] = ( + f"MBARI LRAUV-class AUV data produced from logged data" + f" with execution of '{self.commandline}' at {iso_now} on" + f" host {gethostname()}. Software available at" + f" 'https://github.com/mbari-org/auv-python'" + ) if matches: metadata["summary"] += " " + matches.group(1) - metadata["comment"] = ( - f"MBARI Dorado-class AUV data produced from calibrated data" - f" with execution of '{self.commandline}' at {iso_now} on" - f" host {gethostname()}. Software available at" - f" 'https://github.com/mbari-org/auv-python'" - ) return metadata @@ -474,11 +488,8 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 "Processing %s with group %s and time %s", variable, group_name, timevar ) - # Copy the original variable - self.aligned_nc[variable] = self.combined_nc[variable] - # Get the time index for this variable - var_time = self.aligned_nc[variable].get_index(timevar).view(np.int64).tolist() + var_time = self.combined_nc[variable].get_index(timevar).view(np.int64).tolist() # Calculate sampling rate sample_rate = np.round( @@ -486,20 +497,6 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 decimals=2, ) - # Create aligned variable with proper attributes - self.aligned_nc[variable] = xr.DataArray( - self.combined_nc[variable].values, - dims={timevar}, - coords=[self.combined_nc[variable].get_index(timevar)], - name=variable, - ) - self.aligned_nc[variable].attrs = self.combined_nc[variable].attrs - self.aligned_nc[variable].attrs["coordinates"] = ( - f"{group_name}_time {group_name}_depth {group_name}_latitude {group_name}_longitude" - ) - self.logger.info("%s: instrument_sample_rate_hz = %.2f", variable, sample_rate) - self.aligned_nc[variable].attrs["instrument_sample_rate_hz"] = sample_rate - # Create interpolated coordinate variables for this group coord_names = ["depth", "latitude", "longitude"] coord_interps = [depth_interp, lat_interp, lon_interp] @@ -525,7 +522,7 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 self.aligned_nc[coord_var_name].attrs["long_name"] = coord_name.title() self.aligned_nc[coord_var_name].attrs["instrument_sample_rate_hz"] = sample_rate - if coord_name in ["latitude", "longitude"]: + if coord_name in ["longitude", "latitude", "depth"]: self.aligned_nc[coord_var_name].attrs["comment"] = ( self.aligned_nc[coord_var_name].attrs.get("comment", "") + f". Variable {coord_source} from {src_file} file linearly" @@ -542,11 +539,69 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 ) > pd.to_datetime(self.max_time): self.max_time = pd.to_datetime(self.aligned_nc[timevar][-1].values).tz_localize(UTC) - # Update bounds using the interpolated coordinates - depth_coord = f"{group_name}_depth" - lat_coord = f"{group_name}_latitude" - lon_coord = f"{group_name}_longitude" + # Coordinates - use mapping from global variable_time_coord_mapping attribute + variable_time_coord_mapping = json.loads( + self.combined_nc.attrs.get("variable_time_coord_mapping", "{}") + ) + time_coord = variable_time_coord_mapping.get(variable) + depth_coord = ( + time_coord[:-5] + "_depth" + if time_coord and time_coord.endswith("_time") + else f"{group_name}_depth" + ) + lat_coord = ( + time_coord[:-5] + "_latitude" + if time_coord and time_coord.endswith("_time") + else f"{group_name}_latitude" + ) + lon_coord = ( + time_coord[:-5] + "_longitude" + if time_coord and time_coord.endswith("_time") + else f"{group_name}_longitude" + ) + + # Add interpolated depth, latitude, and longitude variables + if depth_coord in self.combined_nc: + self.aligned_nc[depth_coord].attrs = self.combined_nc[depth_coord].attrs + self.aligned_nc[depth_coord] = xr.DataArray( + depth_interp(var_time).astype(np.float64).tolist(), + dims={timevar}, + coords=[self.combined_nc[variable].get_index(timevar)], + name=depth_coord, + ) + self.aligned_nc[depth_coord].attrs["long_name"] = "Depth" + self.aligned_nc[depth_coord].attrs["comment"] = "depth from Group_Universals.nc" + self.aligned_nc[depth_coord].attrs["instrument_sample_rate_hz"] = sample_rate + self.aligned_nc[lat_coord] = xr.DataArray( + lat_interp(var_time).astype(np.float64).tolist(), + dims={timevar}, + coords=[self.combined_nc[variable].get_index(timevar)], + name=lat_coord, + ) + self.aligned_nc[lat_coord].attrs = self.combined_nc["nudged_latitude"].attrs + self.aligned_nc[lat_coord].attrs["comment"] += ( + f". Variable nudged_latitude from {src_file} file linearly" + f" interpolated onto {variable.split('_')[0]} time values." + ) + self.aligned_nc[lat_coord].attrs["long_name"] = "Latitude" + self.aligned_nc[lat_coord].attrs["instrument_sample_rate_hz"] = sample_rate + + self.aligned_nc[lon_coord] = xr.DataArray( + lon_interp(var_time).astype(np.float64).tolist(), + dims={timevar}, + coords=[self.combined_nc[variable].get_index(timevar)], + name=lon_coord, + ) + self.aligned_nc[lon_coord].attrs = self.combined_nc["nudged_longitude"].attrs + self.aligned_nc[lon_coord].attrs["comment"] += ( + f". Variable nudged_longitude from {src_file} file linearly" + f" interpolated onto {variable.split('_')[0]} time values." + ) + self.aligned_nc[lon_coord].attrs["long_name"] = "Longitude" + self.aligned_nc[lon_coord].attrs["instrument_sample_rate_hz"] = sample_rate + + # Update bounds using the interpolated coordinates if self.aligned_nc[depth_coord].min() < self.min_depth: self.min_depth = self.aligned_nc[depth_coord].min().to_numpy() if self.aligned_nc[depth_coord].max() > self.max_depth: @@ -560,6 +615,29 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 if self.aligned_nc[lon_coord].max() > self.max_lon: self.max_lon = self.aligned_nc[lon_coord].max().to_numpy() + # Create aligned variable with proper attributes + self.aligned_nc[variable] = xr.DataArray( + self.combined_nc[variable].values, + dims={timevar}, + coords=[self.combined_nc[variable].get_index(timevar)], + name=variable, + ) + self.aligned_nc[variable].attrs = self.combined_nc[variable].attrs + if ( + time_coord in self.aligned_nc + and depth_coord in self.aligned_nc + and lat_coord in self.aligned_nc + and lon_coord in self.aligned_nc + ): + self.aligned_nc[variable].attrs["coordinates"] = ( + f"{time_coord} {depth_coord} {lat_coord} {lon_coord}" + ) + else: + self.logger.info("Skipping setting coordinates attribute for %s", variable) + + self.logger.info("%s: instrument_sample_rate_hz = %.2f", variable, sample_rate) + self.aligned_nc[variable].attrs["instrument_sample_rate_hz"] = sample_rate + return netcdfs_dir def write_combined_netcdf( @@ -595,7 +673,7 @@ def write_netcdf(self, netcdfs_dir, vehicle: str = "", name: str = "") -> None: self.logger.debug("Removing file %s", out_fn) out_fn.unlink() self.aligned_nc.to_netcdf(out_fn) - self.logger.info( + self.logger.debug( "Data variables written: %s", ", ".join(sorted(self.aligned_nc.variables)), ) From 5b05b0dc69b23af44303f15fcbc16c2975a6d727 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 18 Nov 2025 16:49:37 -0800 Subject: [PATCH 4/7] Testing process_lrauv. --- .vscode/launch.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index ee820b82..64c6b5d0 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -175,13 +175,14 @@ //"args": ["--auv_name", "dorado", "--mission", "2017.044.00", "-v", "1"] //"args": ["--auv_name", "dorado", "--mission", "2021.102.02", "-v", "1"] //"args": ["--auv_name", "dorado", "--mission", "2004.236.00", "-v", "1"] - "args": ["--auv_name", "dorado", "--mission", "2023.192.01", "-v", "1"] + //"args": ["--auv_name", "dorado", "--mission", "2023.192.01", "-v", "1"] //"args": ["--auv_name", "i2map", "--mission", "2019.157.02", "-v", "2", "--plot", "--plot_seconds", "82000"], //"args": ["--auv_name", "dorado", "--mission", "2021.102.02", "-v", "1", "--flash_threshold", "1.5e10"], //"args": ["--auv_name", "dorado", "--mission", "2024.317.01", "-v", "1"], //"args": ["--auv_name", "dorado", "--mission", "2010.341.00", "-v", "1", "--plot", "--plot_seconds", "82000"], //"args": ["--auv_name", "dorado", "--mission", "2020.337.00", "-v", "1"], //"args": ["--auv_name", "dorado", "--mission", "2023.123.00", "-v", "1"], + "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] }, { "name": "5.0 - archive.py", @@ -333,7 +334,7 @@ //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber"] //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber", "--no_cleanup"] - "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup"] + "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup"] //"args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901", "--end", "20121101", "--noinput"] }, From e7ef537d8e7f00f80506c3fe258fa35a66d39b09 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 18 Nov 2025 16:50:14 -0800 Subject: [PATCH 5/7] Get working for process_lrauv. --- src/data/process.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/data/process.py b/src/data/process.py index b3a6e2e1..5c0d83a4 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -308,6 +308,7 @@ def align(self, mission: str = "", log_file: str = "") -> None: align_netcdf.args.base_path = self.args.base_path align_netcdf.args.auv_name = self.vehicle align_netcdf.args.mission = mission + align_netcdf.args.log_file = self.args.log_file align_netcdf.args.plot = None align_netcdf.args.verbose = self.args.verbose align_netcdf.logger.setLevel(self._log_levels[self.args.verbose]) @@ -316,9 +317,10 @@ def align(self, mission: str = "", log_file: str = "") -> None: try: if log_file: netcdf_dir = align_netcdf.process_combined(log_file=log_file) + align_netcdf.write_combined_netcdf(netcdf_dir, log_file=log_file) else: netcdf_dir = align_netcdf.process_cal() - align_netcdf.write_netcdf(netcdf_dir) + align_netcdf.write_netcdf(netcdf_dir, vehicle=self.vehicle, mission=mission) except (FileNotFoundError, EOFError) as e: align_netcdf.logger.error("%s %s", mission, e) # noqa: TRY400 error_message = f"{mission} {e}" @@ -326,12 +328,13 @@ def align(self, mission: str = "", log_file: str = "") -> None: finally: align_netcdf.logger.removeHandler(self.log_handler) - def resample(self, mission: str) -> None: + def resample(self, mission: str = "") -> None: self.logger.info("Resampling steps for %s", mission) resamp = Resampler() resamp.args = argparse.Namespace() resamp.args.auv_name = self.vehicle resamp.args.mission = mission + resamp.args.log_file = self.args.log_file resamp.args.plot = None resamp.args.freq = self.args.freq resamp.args.mf_width = self.args.mf_width @@ -341,13 +344,17 @@ def resample(self, mission: str) -> None: resamp.logger.setLevel(self._log_levels[self.args.verbose]) resamp.logger.addHandler(self.log_handler) file_name = f"{resamp.args.auv_name}_{resamp.args.mission}_align.nc" - nc_file = Path( - self.args.base_path, - resamp.args.auv_name, - MISSIONNETCDFS, - resamp.args.mission, - file_name, - ) + if resamp.args.log_file: + netcdfs_dir = Path(BASE_LRAUV_PATH, Path(resamp.args.log_file).parent) + nc_file = Path(netcdfs_dir, f"{Path(resamp.args.log_file).stem}_align.nc") + else: + nc_file = Path( + self.args.base_path, + resamp.args.auv_name, + MISSIONNETCDFS, + resamp.args.mission, + file_name, + ) if self.args.flash_threshold and self.args.resample: self.logger.info( "Executing only resample step to produce netCDF file with flash_threshold = %s", @@ -373,7 +380,7 @@ def resample(self, mission: str) -> None: try: resamp.resample_mission(nc_file) except FileNotFoundError as e: - self.logger.error("%s %s", mission, e) # noqa: TRY400 + self.logger.error("%s %s", nc_file, e) # noqa: TRY400 finally: resamp.logger.removeHandler(self.log_handler) @@ -763,6 +770,7 @@ def combine(self, log_file: str) -> None: ) combine = Combine_NetCDF() combine.args = argparse.Namespace() + combine.args.plot = None combine.args.verbose = self.args.verbose combine.args.log_file = self.args.log_file combine.commandline = self.commandline @@ -790,7 +798,7 @@ def process_log_file(self, log_file: str) -> None: netcdfs_dir = self.extract(log_file) self.combine(log_file=log_file) self.align(log_file=log_file) - # self.resample(log_file) + self.resample() # self.create_products(log_file) self.logger.info("Finished processing log file: %s", log_file) From be840a5a6e437662fbbf66e0b0a81a9708803c59 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 18 Nov 2025 16:51:44 -0800 Subject: [PATCH 6/7] Modify for reuse with LRAUV _align.nc data. --- src/data/resample.py | 63 +++++++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 18 deletions(-) diff --git a/src/data/resample.py b/src/data/resample.py index 34653107..53bbd39c 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -27,6 +27,7 @@ import xarray as xr from dorado_info import dorado_info from logs2netcdfs import BASE_PATH, MISSIONNETCDFS, SUMMARY_SOURCE, TIME, AUV_NetCDF +from nc42netcdfs import BASE_LRAUV_PATH from pysolar.solar import get_altitude from scipy import signal @@ -1006,7 +1007,8 @@ def resample_variable( # noqa: PLR0913 instrs_to_pad: dict[str, timedelta], depth_threshold: float, ) -> None: - timevar = f"{instr}_{TIME}" + # Get the time variable name from the dimension of the variable + timevar = self.ds[variable].dims[0] if instr == "biolume" and variable == "biolume_raw": # Only biolume_avg_biolume and biolume_flow treated like other data # All other biolume variables in self.df_r[] are computed from biolume_raw @@ -1135,16 +1137,24 @@ def get_mission_start_end( mission_start = datetime.max # noqa: DTZ901 mission_end = datetime.min # noqa: DTZ901 instrs_to_pad = {} + self.logger.info("Determining mission start and end times") + time_coords = [] for instr in self.instruments_variables(nc_file): time_coord = f"{instr}_{TIME}" - mission_start = min(pd.to_datetime(self.ds[time_coord].min().values), mission_start) - mission_end = max(pd.to_datetime(self.ds[time_coord].max().values), mission_end) - for instr in self.instruments_variables(nc_file): - time_coord = f"{instr}_{TIME}" + try: + mission_start = min(pd.to_datetime(self.ds[time_coord].min().values), mission_start) + mission_end = max(pd.to_datetime(self.ds[time_coord].max().values), mission_end) + time_coords.append(time_coord) + except KeyError: + # Likely an LRAUV _combined.nc file with multiple different dimensions in a Group + self.logger.info( + "Ignoring expected time_coord that could not be found: %s", time_coord + ) + for time_coord in time_coords: duration = mission_end - pd.to_datetime(self.ds[time_coord].max().values) self.logger.info( "%-10s: %s to %s (%s before mission_end)", - instr, + time_coord.split("_")[0], self.ds[time_coord].min().values, self.ds[time_coord].max().values, duration, @@ -1152,10 +1162,10 @@ def get_mission_start_end( if mission_end - pd.to_datetime( self.ds[time_coord].max().values, ) > timedelta(minutes=min_crit): - instrs_to_pad[instr] = duration + instrs_to_pad[time_coord.split("_")[0]] = duration self.logger.warning( "Instrument %s has a gap > %d minutes at the end of the mission: %s", - instr, + time_coord.split("_")[0], min_crit, mission_end - pd.to_datetime(self.ds[time_coord].max().values), ) @@ -1199,8 +1209,10 @@ def resample_mission( # noqa: C901, PLR0912, PLR0915, PLR0913 # Use the pitch corrected depth coordinate for 'ctd1' for dorado, # 'seabird25p' for i2map. The depth coordinate for pitch_corrected_instr # must be as complete as possible as it's used for all the other - # nosecone instruments. - pitch_corrected_instr = "ctd1" + # nosecone instruments. If we are processing LRAUV data then + # use 'ctddseabird', otherwise start with 'ctd1' and fall back to + # 'seabird25p' if needed for i2map missions. + pitch_corrected_instr = "ctdseabird" if self.args.log_file else "ctd1" if f"{pitch_corrected_instr}_depth" not in self.ds: pitch_corrected_instr = "seabird25p" if pitch_corrected_instr in instrs_to_pad: @@ -1322,6 +1334,15 @@ def process_command_line(self): help="Mission directory, e.g.: 2020.064.10", ), ) + parser.add_argument( + "--log_file", + action="store", + help=( + "Path to the log file of original LRAUV data, e.g.: " + "brizo/missionlogs/2025/20250903_20250909/" + "20250905T072042/202509050720_202509051653.nc4" + ), + ) parser.add_argument("--plot", action="store_true", help="Plot data") parser.add_argument( "--plot_seconds", @@ -1374,15 +1395,21 @@ def process_command_line(self): if __name__ == "__main__": resamp = Resampler() resamp.process_command_line() - file_name = f"{resamp.args.auv_name}_{resamp.args.mission}_align.nc" - nc_file = Path( - BASE_PATH, - resamp.args.auv_name, - MISSIONNETCDFS, - resamp.args.mission, - file_name, - ) + if resamp.args.log_file: + netcdfs_dir = Path(BASE_LRAUV_PATH, f"{Path(resamp.args.log_file).parent}") + nc_file = Path(netcdfs_dir, f"{Path(resamp.args.log_file).stem}_align.nc") + else: + file_name = f"{resamp.args.auv_name}_{resamp.args.mission}_align.nc" + nc_file = Path( + BASE_PATH, + resamp.args.auv_name, + MISSIONNETCDFS, + resamp.args.mission, + file_name, + ) p_start = time.time() + # Everything that Resampler needs should be in the self described nc_file + # whether it is Dorado/i2MAP or LRAUV resamp.resample_mission( nc_file, mf_width=resamp.args.mf_width, From 10a3c80e824414d940a4eebf54ede3413cfeaa83 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 18 Nov 2025 17:09:33 -0800 Subject: [PATCH 7/7] Fix for the tests. --- src/data/conftest.py | 2 ++ src/data/process.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/data/conftest.py b/src/data/conftest.py index 4f08da02..fd181ce1 100644 --- a/src/data/conftest.py +++ b/src/data/conftest.py @@ -103,6 +103,7 @@ def complete_dorado_processing(): ns.skip_download_process = False ns.num_cores = 1 ns.add_seconds = None + ns.log_file = None ns.verbose = 1 proc.args = ns proc.process_missions(TEST_START_YEAR) @@ -149,6 +150,7 @@ def complete_i2map_processing(): ns.last_n_days = 0 ns.num_cores = 1 ns.add_seconds = None + ns.log_file = None ns.verbose = 1 proc.args = ns proc.process_missions(TEST_START_YEAR) diff --git a/src/data/process.py b/src/data/process.py index 5c0d83a4..cc0a190f 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -320,7 +320,7 @@ def align(self, mission: str = "", log_file: str = "") -> None: align_netcdf.write_combined_netcdf(netcdf_dir, log_file=log_file) else: netcdf_dir = align_netcdf.process_cal() - align_netcdf.write_netcdf(netcdf_dir, vehicle=self.vehicle, mission=mission) + align_netcdf.write_combined_netcdf(netcdf_dir, vehicle=self.vehicle) except (FileNotFoundError, EOFError) as e: align_netcdf.logger.error("%s %s", mission, e) # noqa: TRY400 error_message = f"{mission} {e}"