From c9f58d6db4f5341683ca180f50fe6a0bbd62c396 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 6 Nov 2025 08:56:41 -0800 Subject: [PATCH 01/28] Remove unused methods, override coordinates attribute with just '_time' as these are time series, not yet trajectory data. --- src/data/nc42netcdfs.py | 41 +++-------------------------------------- 1 file changed, 3 insertions(+), 38 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 42d5e672..97cc3955 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -189,11 +189,6 @@ def download_with_pooch(self, url, local_dir, known_hash=None): downloader=downloader, ) - def get_groups_netcdf4(self, file_path): - """Get list of groups using netCDF4 library.""" - with netCDF4.Dataset(file_path, "r") as dataset: - return list(dataset.groups.keys()) - def extract_groups_to_files_netcdf4(self, log_file: str) -> Path: """Extract each group from .nc4 file to a separate .nc file using netCDF4 library. @@ -714,6 +709,8 @@ def _copy_variable_with_appropriate_time_filter( # Copy attributes for attr_name in src_var.ncattrs(): dst_var.setncattr(attr_name, src_var.getncattr(attr_name)) + # override any coordinates attribute with just the time coordinate + dst_var.setncattr("coordinates", var_name + "_time") self.logger.debug(" Copied variable: %s", var_name) @@ -872,7 +869,7 @@ def _create_netcdf_file( # noqa: PLR0913 if any(tf["filtered"] for tf in time_filters.values()): dst_dataset.setncattr( "processing_note", - "Non-monotonic time values filtered from original, see comment in variables", + "Non-monotonic time values filtered from original, see variable comments", ) # Create dimensions - may need to adjust time dimension sizes @@ -910,16 +907,6 @@ def _get_required_dimensions( dims_needed.update(var.dimensions) return dims_needed - def _create_dimensions( - self, src_group: netCDF4.Group, dst_dataset: netCDF4.Dataset, dims_needed: set[str] - ): - """Create dimensions in the destination dataset.""" - for dim_name in dims_needed: - if dim_name in src_group.dimensions: - src_dim = src_group.dimensions[dim_name] - size = len(src_dim) if not src_dim.isunlimited() else None - dst_dataset.createDimension(dim_name, size) - def _get_coordinate_variables( self, src_group: netCDF4.Group, dims_needed: set[str], vars_to_extract: list[str] ) -> list[str]: @@ -930,28 +917,6 @@ def _get_coordinate_variables( coord_vars.append(dim_name) # noqa: PERF401 return coord_vars - def _copy_variable(self, src_group: netCDF4.Group, dst_dataset: netCDF4.Dataset, var_name: str): - """Helper method to copy a variable from source to destination.""" - try: - src_var = src_group.variables[var_name] - - # Create variable in destination - dst_var = dst_dataset.createVariable( - var_name, - src_var.dtype, - src_var.dimensions, - ) - - # Copy data and attributes - dst_var[:] = src_var[:] - for attr_name in src_var.ncattrs(): - dst_var.setncattr(attr_name, src_var.getncattr(attr_name)) - - self.logger.debug(" Copied variable: %s", var_name) - - except Exception as e: # noqa: BLE001 - self.logger.warning("Failed to copy variable %s: %s", var_name, e) - def global_metadata(self, log_file: str, group_name: str): """Use instance variables to return a dictionary of metadata specific for the data that are written From b06fafe5ac9b03c16fcd7dec605fe7c0cd963468 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 6 Nov 2025 16:18:12 -0800 Subject: [PATCH 02/28] Use all fixed dimensions (no unlimited) and add attributes so that cf_xarray can be used in combine.py. --- src/data/nc42netcdfs.py | 98 ++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 97cc3955..5c4adc65 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -241,6 +241,15 @@ def _extract_root_group( self.logger.info("Extracting root group '/'") vars_to_extract = self._get_available_variables(src_dataset, root_parms) + # Add debugging output for root group processing + self.logger.info("=== ROOT GROUP DEBUG ===") + self.logger.info("Available variables: %s", sorted(vars_to_extract)) + self.logger.info("Available dimensions: %s", sorted(src_dataset.dimensions.keys())) + self.logger.info( + "Available coordinate variables: %s", + [v for v in sorted(src_dataset.variables.keys()) if v in src_dataset.dimensions], + ) + if vars_to_extract: output_file = output_dir / f"{Path(log_file).stem}_{GROUP}_Universals.nc" self._create_netcdf_file( @@ -369,12 +378,14 @@ def _find_time_coordinates( "=================================== Group: %s =======================================", group_name, ) - for var_name in vars_to_extract: + # Sort variables to make processing deterministic + for var_name in sorted(vars_to_extract): if var_name in src_group.variables: var = src_group.variables[var_name] # Check each dimension to see if it's a time coordinate - for dim_name in var.dimensions: + # Sort dimensions to make processing deterministic + for dim_name in sorted(var.dimensions): if dim_name in src_group.variables: dim_var = src_group.variables[dim_name] @@ -660,7 +671,7 @@ def _plot_time_filtering(self, plot_data: dict): self.logger.info("Time filtering plot displayed for %s", plot_data["variable_name"]) - def _copy_variable_with_appropriate_time_filter( + def _copy_variable_with_appropriate_time_filter( # noqa: C901, PLR0912 self, src_group: netCDF4.Group, dst_dataset: netCDF4.Dataset, @@ -671,6 +682,18 @@ def _copy_variable_with_appropriate_time_filter( try: src_var = src_group.variables[var_name] + # Skip variables that use time dimensions with 0 points + for dim_name in src_var.dimensions: + if ( + dim_name in time_filters + and time_filters[dim_name]["filtered"] + and len(time_filters[dim_name]["indices"]) == 0 + ): + self.logger.debug( + "Skipping variable %s (uses dimension %s with 0 points)", var_name, dim_name + ) + return + # Create variable in destination dst_var = dst_dataset.createVariable( var_name, @@ -709,8 +732,17 @@ def _copy_variable_with_appropriate_time_filter( # Copy attributes for attr_name in src_var.ncattrs(): dst_var.setncattr(attr_name, src_var.getncattr(attr_name)) - # override any coordinates attribute with just the time coordinate - dst_var.setncattr("coordinates", var_name + "_time") + if var_name in time_filters and time_filters[var_name]["filtered"]: + # Downstream process uses cf_xarray to recognize coordinates, add required attribute + dst_var.setncattr("standard_name", "time") + else: + # Override any coordinates attribute in src with just the time coordinate + dst_var.setncattr("coordinates", var_name + "_time") + # Downstream process uses cf_xarray to recognize coordinates, add required attribute + if var_name.startswith(("longitude", "latitude")): + dst_var.setncattr("units", "radians") + elif var_name.startswith("depth"): + dst_var.setncattr("units", "meters") self.logger.debug(" Copied variable: %s", var_name) @@ -761,65 +793,35 @@ def _create_dimensions_with_time_filters( time_filters: dict[str, dict], ): """Create dimensions in the destination dataset, adjusting time dimensions if filtered.""" - # NetCDF3 allows only one unlimited dimension - primary_time_dim = self._find_primary_time_dimension(src_group, dims_needed, time_filters) - unlimited_dim_created = False - + # Use fixed dimensions for all - simpler and avoids NetCDF3 unlimited dimension issues for dim_name in dims_needed: if dim_name not in src_group.dimensions: continue src_dim = src_group.dimensions[dim_name] - should_be_unlimited = dim_name == primary_time_dim and not unlimited_dim_created size = self._calculate_dimension_size( - dim_name, src_dim, time_filters, should_be_unlimited + dim_name, src_dim, time_filters, should_be_unlimited=False ) - # Track if we created the unlimited dimension - if size is None: - unlimited_dim_created = True + # Skip dimensions with 0 points to avoid NetCDF3 conflicts + if size == 0: + self.logger.debug("Skipping dimension %s with 0 points", dim_name) + continue dst_dataset.createDimension(dim_name, size) - def _find_primary_time_dimension( - self, src_group: netCDF4.Group, dims_needed: set[str], time_filters: dict[str, dict] - ) -> str | None: - """Find the primary time dimension that should be unlimited in NetCDF3.""" - for dim_name in dims_needed: - if dim_name in src_group.dimensions: - src_dim = src_group.dimensions[dim_name] - is_time_like = "time" in dim_name.lower() or dim_name in time_filters - if src_dim.isunlimited() and is_time_like: - return dim_name - - # Fallback: return first unlimited dimension found - for dim_name in dims_needed: - if dim_name in src_group.dimensions and src_group.dimensions[dim_name].isunlimited(): - return dim_name - - return None - def _calculate_dimension_size( self, dim_name: str, src_dim, time_filters: dict[str, dict], should_be_unlimited: bool, # noqa: FBT001 - ) -> int | None: - """Calculate the size for a dimension, handling NetCDF3 unlimited dimension constraint.""" + ) -> int: + """Calculate the size for a dimension - always returns fixed size for simplicity.""" is_filtered_time = dim_name in time_filters and time_filters[dim_name]["filtered"] if is_filtered_time: filtered_size = len(time_filters[dim_name]["indices"]) - if should_be_unlimited: - self.logger.debug( - "Created filtered unlimited time dimension %s: %s -> unlimited (%d points)", - dim_name, - len(src_dim), - filtered_size, - ) - return None # Unlimited - self.logger.debug( "Created filtered fixed time dimension %s: %s -> %s", dim_name, @@ -828,18 +830,16 @@ def _calculate_dimension_size( ) return filtered_size - # Non-filtered dimension - if should_be_unlimited: - self.logger.debug("Created unlimited dimension %s", dim_name) - return None - + # Non-filtered dimension - always fixed size size = len(src_dim) if src_dim.isunlimited(): self.logger.debug( - "Converting unlimited dimension %s to fixed size %s (NetCDF3 limitation)", + "Converting unlimited dimension %s to fixed size %s", dim_name, size, ) + else: + self.logger.debug("Created fixed dimension %s: %s", dim_name, size) return size def _create_netcdf_file( # noqa: PLR0913 From 8367bd41b019349c332dfc855af652bc73f2648b Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 6 Nov 2025 16:34:03 -0800 Subject: [PATCH 03/28] Add nudged longitude and latitude variables to the combined_nc dataset. --- src/data/combine.py | 84 +++++++++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index e29963cb..c186f68e 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -285,38 +285,6 @@ def _range_qc_combined_nc( # noqa: C901, PLR0912 self.combined_nc = self.combined_nc.drop_vars(qced_vars) self.logger.info("Done range checking %s", instrument) - def _nudge_pos(self, max_sec_diff_at_end=10): - """Apply linear nudges to underwater latitudes and longitudes so that - they match the surface gps positions. - """ - try: - lon = self.combined_nc["navigation_longitude"] - except KeyError: - error_message = "No navigation_longitude data in combined_nc" - raise EOFError(error_message) from None - lat = self.combined_nc["navigation_latitude"] - lon_fix = self.combined_nc["gps_longitude"] - lat_fix = self.combined_nc["gps_latitude"] - - # Use the shared function from AUV module - lon_nudged, lat_nudged, segment_count, segment_minsum = nudge_positions( - nav_longitude=lon, - nav_latitude=lat, - gps_longitude=lon_fix, - gps_latitude=lat_fix, - logger=self.logger, - auv_name=self.args.auv_name, - mission=self.args.mission, - max_sec_diff_at_end=max_sec_diff_at_end, - create_plots=True, - ) - - # Store results in instance variables for compatibility - self.segment_count = segment_count - self.segment_minsum = segment_minsum - - return lon_nudged, lat_nudged - def _apply_plumbing_lag( self, sensor: str, @@ -551,6 +519,38 @@ def _geometric_depth_correction(self, sensor, orig_nc): return corrected_depth + def _nudge_pos(self, max_sec_diff_at_end=10): + """Apply linear nudges to underwater latitudes and longitudes so that + they match the surface gps positions. + """ + try: + lon = self.combined_nc["universals_longitude"] + except KeyError: + error_message = "No universals_longitude data in combined_nc" + raise EOFError(error_message) from None + lat = self.combined_nc["universals_latitude"] + lon_fix = self.combined_nc["nal9602_longitude_fix"] + lat_fix = self.combined_nc["nal9602_latitude_fix"] + + # Use the shared function from AUV module + lon_nudged, lat_nudged, segment_count, segment_minsum = nudge_positions( + nav_longitude=lon, + nav_latitude=lat, + gps_longitude=lon_fix, + gps_latitude=lat_fix, + logger=self.logger, + auv_name="", + mission="", + max_sec_diff_at_end=max_sec_diff_at_end, + create_plots=True, + ) + + # Store results in instance variables for compatibility + self.segment_count = segment_count + self.segment_minsum = segment_minsum + + return lon_nudged, lat_nudged + def combine_groups(self): log_file = self.args.log_file src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) @@ -558,7 +558,6 @@ def combine_groups(self): self.combined_nc = xr.Dataset() for group_file in group_files: self.logger.info("Group file: %s", group_file.name) - # Make nudged_longitude, nudged_latitude = self._nudge_pos() call on when appropriate # Loop through each variable in the group file and add it to the combined_nc member list with xr.open_dataset(group_file) as ds: for orig_var in ds.variables: @@ -570,6 +569,23 @@ def combine_groups(self): self.logger.info("Adding variable %-65s %s", f"{orig_var} as", new_var) self.combined_nc[new_var] = ds[orig_var] + # Add nudged longitude and latitude variables to the combined_nc dataset + nudged_longitude, nudged_latitude = self._nudge_pos() + self.combined_nc["nudged_longitude"] = nudged_longitude + self.combined_nc["nudged_longitude"].attrs = { + "long_name": "Nudged Longitude", + "standard_name": "longitude", + "units": "degrees_east", + "comment": "Dead reckoned longitude nudged to GPS positions", + } + self.combined_nc["nudged_latitude"] = nudged_latitude + self.combined_nc["nudged_latitude"].attrs = { + "long_name": "Nudged Latitude", + "standard_name": "latitude", + "units": "degrees_north", + "comment": "Dead reckoned latitude nudged to GPS positions", + } + def write_netcdf(self) -> None: log_file = self.args.log_file netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) @@ -613,7 +629,7 @@ def process_command_line(self): "--log_file", action="store", help=( - "Path to the log file for the mission, e.g.: " + "Path to the log file of original LRAUV data, e.g.: " "brizo/missionlogs/2025/20250903_20250909/" "20250905T072042/202509050720_202509051653.nc4" ), From fb8a446f1ded197d3af13b0e81d66fd42950d9a8 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 6 Nov 2025 16:35:28 -0800 Subject: [PATCH 04/28] Use cf_xarray accessors by axis to avoid using actual names for the time coorinate. --- src/data/AUV.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/data/AUV.py b/src/data/AUV.py index c8bef718..4d09ced6 100755 --- a/src/data/AUV.py +++ b/src/data/AUV.py @@ -11,6 +11,7 @@ import logging from datetime import datetime +import cf_xarray # Needed for the .cf accessor # noqa: F401 import numpy as np import xarray as xr @@ -88,22 +89,21 @@ def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 if lon[:][segi].any(): lon_nudged_array = lon[segi] lat_nudged_array = lat[segi] - dt_nudged = lon.get_index("navigation_time")[segi] + dt_nudged = lon.cf["T"][segi] logger.debug( "Filled _nudged arrays with %d values starting at %s " "which were before the first GPS fix at %s", len(segi), - lat.get_index("navigation_time")[0], - lat_fix.get_index("gps_time")[0], + lat.cf["T"].data[0], + lat_fix.cf["T"].data[0], ) else: lon_nudged_array = np.array([]) lat_nudged_array = np.array([]) dt_nudged = np.array([], dtype="datetime64[ns]") if segi.any(): - seg_min = ( - lat.get_index("navigation_time")[segi][-1] - lat.get_index("navigation_time")[segi][0] - ).total_seconds() / 60 + # Return difference of numpy timestamps in units of minutes + seg_min = (lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]).astype("timedelta64[m]") else: seg_min = 0 logger.info( From 18a631b2a388e55f5fd6da5651b76fc571363030 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 6 Nov 2025 16:36:52 -0800 Subject: [PATCH 05/28] Add combine(). --- src/data/process.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/data/process.py b/src/data/process.py index 4dcedd38..fbb0428d 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -68,6 +68,7 @@ class data are: download_process and calibrate, while for LRAUV class data from align import Align_NetCDF, InvalidCalFile from archive import LOG_NAME, Archiver from calibrate import EXPECTED_SENSORS, Calibrate_NetCDF +from combine import Combine_NetCDF from create_products import CreateProducts from dorado_info import FAILED, TEST, dorado_info from emailer import NOTIFICATION_EMAIL, Emailer @@ -739,6 +740,8 @@ def extract(self, log_file: str) -> None: extract = Extract() extract.args = argparse.Namespace() extract.args.verbose = self.args.verbose + extract.args.log_file = self.args.log_file + extract.commandline = self.commandline extract.logger.setLevel(self._log_levels[self.args.verbose]) extract.logger.addHandler(self.log_handler) @@ -748,6 +751,23 @@ def extract(self, log_file: str) -> None: input_file = extract.download_with_pooch(url, output_dir) return extract.extract_groups_to_files_netcdf4(input_file) + def combine(self, log_file: str) -> None: + self.logger.info("Combining netCDF files for log file: %s", log_file) + self.logger.info( + "Equivalent to the calibrate step for Dorado class vehicles. " + "Adds nudge positions and more layers of quality control." + ) + combine = Combine_NetCDF() + combine.args = argparse.Namespace() + combine.args.verbose = self.args.verbose + combine.args.log_file = self.args.log_file + combine.commandline = self.commandline + combine.logger.setLevel(self._log_levels[self.args.verbose]) + combine.logger.addHandler(self.log_handler) + + combine.combine_groups() + combine.write_netcdf() + @log_file_processor def process_log_file(self, log_file: str) -> None: netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) @@ -764,7 +784,8 @@ def process_log_file(self, log_file: str) -> None: self.logger.info("commandline = %s", self.commandline) netcdfs_dir = self.extract(log_file) - # self.align(log_file) + self.combine(log_file=log_file) + self.align(log_file=log_file) # self.resample(log_file) # self.create_products(log_file) self.logger.info("Finished processing log file: %s", log_file) From bab21345a8fe99930cabe0e0eced4ff4e5de461d Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 7 Nov 2025 10:30:57 -0800 Subject: [PATCH 06/28] Add _analyze_original_time_coordinates() to log warnings for log_files like brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4. --- src/data/nc42netcdfs.py | 115 +++++++++++++++++++++++++++++----------- 1 file changed, 83 insertions(+), 32 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 5c4adc65..4ae1fbe8 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -303,38 +303,6 @@ def _get_available_variables( self.logger.debug(" Variables to extract: %s", vars_to_extract) return vars_to_extract - def _find_time_coordinate(self, src_group: netCDF4.Group) -> str: - """Find the time coordinate variable in a group using introspection. - - Returns: - str: Name of the time coordinate variable, or empty string if not found - """ - # Strategy 1: Look for variables with "time" in the name (most common) - time_vars = [var_name for var_name in src_group.variables if "time" in var_name.lower()] - if time_vars: - # Prefer variables that start with 'time' (like time_NAL9602) - time_vars.sort(key=lambda x: (not x.lower().startswith("time"), x)) - self.logger.debug("Found time coordinate %s via name pattern", time_vars[0]) - return time_vars[0] - - # Strategy 2: Look for variables with time-like units - for var_name, var in src_group.variables.items(): - if hasattr(var, "units"): - units = getattr(var, "units", "").lower() - time_patterns = ["seconds since", "days since", "hours since"] - if any(pattern in units for pattern in time_patterns): - self.logger.debug("Found time coordinate %s via units", var_name) - return var_name - - # Strategy 3: Look for unlimited dimension (backup) - for dim_name, dim in src_group.dimensions.items(): - if dim.isunlimited() and dim_name in src_group.variables: - self.logger.debug("Found time coordinate %s via unlimited dimension", dim_name) - return dim_name - - self.logger.debug("No time coordinate found in group") - return "" - def _get_time_filters_for_variables( self, log_file: str, group_name: str, src_group: netCDF4.Group, vars_to_extract: list[str] ) -> dict[str, dict]: @@ -351,6 +319,10 @@ def _get_time_filters_for_variables( # Find all time coordinates used by variables in extraction list time_coords_found = self._find_time_coordinates(group_name, src_group, vars_to_extract) + # Add diagnostic check to compare original time coordinate values + if len(time_coords_found) > 1: + self._analyze_original_time_coordinates(src_group, time_coords_found, group_name) + # Parse plot time settings once plot_group_name, plot_time_coord_name = self._parse_plot_time_argument() @@ -369,6 +341,85 @@ def _get_time_filters_for_variables( return time_filters + def _analyze_original_time_coordinates( + self, src_group: netCDF4.Group, time_coords_found: set[str], group_name: str + ): + """Quick diagnostic for Dead Reckoned timing issues in root group.""" + # Only analyze root group Dead Reckoned coordinates + if group_name != "/": + return + + if ( + "latitude_time" not in time_coords_found + or "longitude_time" not in time_coords_found + or "latitude_time" not in src_group.variables + or "longitude_time" not in src_group.variables + ): + return + + lat_time = src_group.variables["latitude_time"][:] + lon_time = src_group.variables["longitude_time"][:] + + # Quick check for Dead Reckoned timing synchronization + min_len = min(len(lat_time), len(lon_time)) + if min_len == 0: + return + + # Compare overlapping portion + overlap_equal = np.array_equal(lat_time[:min_len], lon_time[:min_len]) + + if overlap_equal and len(lat_time) == len(lon_time): + self.logger.info( + "Dead Reckoned timing: latitude_time and longitude_time are properly synchronized" + ) + return + + # Calculate timing differences for diagnosis + time_diff = lon_time[:min_len] - lat_time[:min_len] + non_zero_mask = time_diff != 0 + num_differences = np.sum(non_zero_mask) + percent_different = 100.0 * num_differences / min_len + + if len(lat_time) != len(lon_time): + self.logger.warning( + "Dead Reckoned timing: Different array lengths - " + "latitude_time: %d, longitude_time: %d", + len(lat_time), + len(lon_time), + ) + + if num_differences > 0: + diff_values = time_diff[non_zero_mask] + max_abs_diff = np.max(np.abs(diff_values)) + + # Define thresholds for Dead Reckoned timing issues + MAJOR_PERCENT_THRESHOLD = 50.0 # 50% different points + MAJOR_TIME_THRESHOLD = 3600.0 # 1 hour difference + MINOR_PERCENT_THRESHOLD = 5.0 # 5% different points + MINOR_TIME_THRESHOLD = 60.0 # 1 minute difference + + if percent_different > MAJOR_PERCENT_THRESHOLD or max_abs_diff > MAJOR_TIME_THRESHOLD: + self.logger.warning( + "Dead Reckoned timing: Significant synchronization issues detected - " + "%.1f%% of coordinates have timing differences (max: %.1f seconds)", + percent_different, + max_abs_diff, + ) + elif percent_different > MINOR_PERCENT_THRESHOLD or max_abs_diff > MINOR_TIME_THRESHOLD: + self.logger.warning( + "Dead Reckoned timing: Minor synchronization issues detected - " + "%.1f%% of coordinates have timing differences (max: %.1f seconds)", + percent_different, + max_abs_diff, + ) + else: + self.logger.info( + "Dead Reckoned timing: Small timing differences detected - " + "%.1f%% of coordinates differ (max: %.1f seconds)", + percent_different, + max_abs_diff, + ) + def _find_time_coordinates( self, group_name: str, src_group: netCDF4.Group, vars_to_extract: list[str] ) -> set[str]: From 31c40305c58b963836451059e9adef68817a016d Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 7 Nov 2025 11:24:27 -0800 Subject: [PATCH 07/28] Handle exceptions more explicitely, improve warning messages. --- src/data/nc42netcdfs.py | 172 ++++++++++++++++++++-------------------- 1 file changed, 86 insertions(+), 86 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 4ae1fbe8..9f0e54f1 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -237,30 +237,26 @@ def _extract_root_group( if not root_parms: return - try: - self.logger.info("Extracting root group '/'") - vars_to_extract = self._get_available_variables(src_dataset, root_parms) + self.logger.info("Extracting root group '/'") + vars_to_extract, _ = self._get_available_variables(src_dataset, root_parms) + + # Add debugging output for root group processing + self.logger.info("=== ROOT GROUP DEBUG ===") + self.logger.info("Available variables: %s", sorted(vars_to_extract)) + self.logger.info("Available dimensions: %s", sorted(src_dataset.dimensions.keys())) + self.logger.info( + "Available coordinate variables: %s", + [v for v in sorted(src_dataset.variables.keys()) if v in src_dataset.dimensions], + ) - # Add debugging output for root group processing - self.logger.info("=== ROOT GROUP DEBUG ===") - self.logger.info("Available variables: %s", sorted(vars_to_extract)) - self.logger.info("Available dimensions: %s", sorted(src_dataset.dimensions.keys())) - self.logger.info( - "Available coordinate variables: %s", - [v for v in sorted(src_dataset.variables.keys()) if v in src_dataset.dimensions], + if vars_to_extract: + output_file = output_dir / f"{Path(log_file).stem}_{GROUP}_Universals.nc" + self._create_netcdf_file( + log_file, group_name, src_dataset, vars_to_extract, output_file ) - - if vars_to_extract: - output_file = output_dir / f"{Path(log_file).stem}_{GROUP}_Universals.nc" - self._create_netcdf_file( - log_file, group_name, src_dataset, vars_to_extract, output_file - ) - self.logger.info("Extracted root group '/' to %s", output_file) - else: - self.logger.warning("No requested variables found in root group '/'") - - except Exception as e: # noqa: BLE001 - self.logger.warning("Could not extract root group '/': %s", e) + self.logger.info("Extracted root group '/' to %s", output_file) + else: + self.logger.warning("No requested variables found in root group '/'") def _extract_single_group( self, @@ -276,7 +272,7 @@ def _extract_single_group( self.logger.debug(" Group %s", group_name) src_group = src_dataset.groups[group_name] - vars_to_extract = self._get_available_variables(src_group, group_parms) + vars_to_extract, requested_vars = self._get_available_variables(src_group, group_parms) if vars_to_extract: output_file = output_dir / f"{Path(log_file).stem}_{GROUP}_{group_name}.nc" @@ -285,12 +281,12 @@ def _extract_single_group( ) self.logger.info("Extracted %s to %s", group_name, output_file) else: - self.logger.warning("No requested variables found in group %s", group_name) + self.logger.warning( + "No requested variables (%s) found in group %s", requested_vars, group_name + ) except KeyError: self.logger.warning("Group %s not found", group_name) - # except Exception as e: # noqa: BLE001 - # self.logger.warning("Could not extract %s: %s", group_name, e) def _get_available_variables( self, src_group: netCDF4.Group, group_parms: list[dict[str, Any]] @@ -301,7 +297,7 @@ def _get_available_variables( vars_to_extract = [var for var in requested_vars if var in available_vars] self.logger.debug(" Variables to extract: %s", vars_to_extract) - return vars_to_extract + return vars_to_extract, requested_vars def _get_time_filters_for_variables( self, log_file: str, group_name: str, src_group: netCDF4.Group, vars_to_extract: list[str] @@ -730,75 +726,79 @@ def _copy_variable_with_appropriate_time_filter( # noqa: C901, PLR0912 time_filters: dict[str, dict], ): """Copy a variable with appropriate time filtering applied.""" - try: - src_var = src_group.variables[var_name] - - # Skip variables that use time dimensions with 0 points - for dim_name in src_var.dimensions: - if ( - dim_name in time_filters - and time_filters[dim_name]["filtered"] - and len(time_filters[dim_name]["indices"]) == 0 - ): - self.logger.debug( - "Skipping variable %s (uses dimension %s with 0 points)", var_name, dim_name - ) - return + src_var = src_group.variables[var_name] + + # Skip variables that use time dimensions with 0 points + for dim_name in src_var.dimensions: + if ( + dim_name in time_filters + and time_filters[dim_name]["filtered"] + and len(time_filters[dim_name]["indices"]) == 0 + ): + self.logger.debug( + "Skipping variable %s (uses dimension %s with 0 points)", var_name, dim_name + ) + return - # Create variable in destination + # Create variable in destination + try: dst_var = dst_dataset.createVariable( var_name, src_var.dtype, src_var.dimensions, + zlib=True, + complevel=4, ) + except ValueError as e: + self.logger.warning( + "Could not create variable %s in destination dataset: %s. ", + var_name, + str(e), + ) + return - # Check if this variable itself is a time coordinate that needs filtering - if var_name in time_filters and time_filters[var_name]["filtered"]: - # This is a time coordinate variable that needs filtering - time_indices = time_filters[var_name]["indices"] - dst_var[:] = src_var[:][time_indices] - dst_var.setncattr("comment", time_filters[var_name]["comment"]) - self.logger.debug("Applied time filtering to time coordinate %s", var_name) - - # Check if this variable depends on any filtered time dimensions - elif src_var.dimensions: - # Find which (if any) of this variable's dimensions are filtered time coordinates - filtered_dims = {} - for dim_name in src_var.dimensions: - if dim_name in time_filters and time_filters[dim_name]["filtered"]: - filtered_dims[dim_name] = time_filters[dim_name]["indices"] - - if filtered_dims: - # Apply filtering for the appropriate dimensions - self._apply_multidimensional_time_filter( - src_var, dst_var, var_name, filtered_dims - ) - else: - # No time filtering needed - dst_var[:] = src_var[:] + # Check if this variable itself is a time coordinate that needs filtering + if var_name in time_filters and time_filters[var_name]["filtered"]: + # This is a time coordinate variable that needs filtering + time_indices = time_filters[var_name]["indices"] + dst_var[:] = src_var[:][time_indices] + dst_var.setncattr("comment", time_filters[var_name]["comment"]) + self.logger.debug("Applied time filtering to time coordinate %s", var_name) + + # Check if this variable depends on any filtered time dimensions + elif src_var.dimensions: + # Find which (if any) of this variable's dimensions are filtered time coordinates + filtered_dims = {} + for dim_name in src_var.dimensions: + if dim_name in time_filters and time_filters[dim_name]["filtered"]: + filtered_dims[dim_name] = time_filters[dim_name]["indices"] + + if filtered_dims: + # Apply filtering for the appropriate dimensions + self._apply_multidimensional_time_filter(src_var, dst_var, var_name, filtered_dims) else: - # Scalar variable or no dimensions + # No time filtering needed dst_var[:] = src_var[:] + else: + # Scalar variable or no dimensions + dst_var[:] = src_var[:] - # Copy attributes - for attr_name in src_var.ncattrs(): - dst_var.setncattr(attr_name, src_var.getncattr(attr_name)) - if var_name in time_filters and time_filters[var_name]["filtered"]: - # Downstream process uses cf_xarray to recognize coordinates, add required attribute - dst_var.setncattr("standard_name", "time") - else: - # Override any coordinates attribute in src with just the time coordinate - dst_var.setncattr("coordinates", var_name + "_time") - # Downstream process uses cf_xarray to recognize coordinates, add required attribute - if var_name.startswith(("longitude", "latitude")): - dst_var.setncattr("units", "radians") - elif var_name.startswith("depth"): - dst_var.setncattr("units", "meters") - - self.logger.debug(" Copied variable: %s", var_name) - - except Exception as e: # noqa: BLE001 - self.logger.warning("Failed to copy variable %s: %s", var_name, e) + # Copy attributes + for attr_name in src_var.ncattrs(): + dst_var.setncattr(attr_name, src_var.getncattr(attr_name)) + if var_name in time_filters and time_filters[var_name]["filtered"]: + # Downstream process uses cf_xarray to recognize coordinates, add required attribute + dst_var.setncattr("standard_name", "time") + else: + # Override any coordinates attribute in src with just the time coordinate + dst_var.setncattr("coordinates", var_name + "_time") + # Downstream process uses cf_xarray to recognize coordinates, add required attribute + if var_name.startswith(("longitude", "latitude")): + dst_var.setncattr("units", "radians") + elif var_name.startswith("depth"): + dst_var.setncattr("units", "meters") + + self.logger.debug(" Copied variable: %s", var_name) def _apply_multidimensional_time_filter( self, src_var, dst_var, var_name: str, filtered_dims: dict[str, list[int]] From 40d0610860b6ff02ccf3e58d24659246b19cb55d Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 10 Nov 2025 10:10:20 -0800 Subject: [PATCH 08/28] Add --log_file option and generalize for doroado or lrauv processing. --- src/data/align.py | 44 +++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/src/data/align.py b/src/data/align.py index 9e07d43e..f8603424 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -34,6 +34,7 @@ TIME60HZ, AUV_NetCDF, ) +from nc42netcdfs import BASE_LRAUV_PATH from scipy.interpolate import interp1d @@ -127,16 +128,20 @@ def global_metadata(self): return metadata - def process_cal(self, vehicle: str = "", name: str = "") -> None: # noqa: C901, PLR0912, PLR0915 + def process_cal(self, vehicle: str = "", name: str = "", log_file: str = "") -> None: # noqa: C901, PLR0912, PLR0915 name = name or self.args.mission vehicle = vehicle or self.args.auv_name - netcdfs_dir = Path(self.args.base_path, vehicle, MISSIONNETCDFS, name) - in_fn = f"{vehicle}_{name}_cal.nc" - try: - self.calibrated_nc = xr.open_dataset(Path(netcdfs_dir, in_fn)) - except ValueError as e: - raise InvalidCalFile(e) from e - self.logger.info("Processing %s from %s", in_fn, netcdfs_dir) + if name and vehicle: + netcdfs_dir = Path(self.args.base_path, vehicle, MISSIONNETCDFS, name) + src_file = Path(netcdfs_dir, f"{vehicle}_{name}_cal.nc") + elif log_file: + netcdfs_dir = Path(BASE_LRAUV_PATH, f"{Path(log_file).parent}") + src_file = Path(netcdfs_dir, f"{Path(log_file).stem}_cal.nc") + else: + msg = "Must provide either mission and vehicle or log_file" + raise ValueError(msg) + self.calibrated_nc = xr.open_dataset(src_file) + self.logger.info("Processing %s", src_file) self.aligned_nc = xr.Dataset() self.min_time = datetime.now(UTC) self.max_time = datetime(1970, 1, 1, tzinfo=UTC) @@ -178,7 +183,7 @@ def process_cal(self, vehicle: str = "", name: str = "") -> None: # noqa: C901, bounds_error=False, ) except KeyError: - error_message = f"No nudged_latitude data in {in_fn}" + error_message = f"No nudged_latitude data in {src_file}" raise InvalidCalFile(error_message) from None lon_interp = interp1d( self.calibrated_nc["nudged_longitude"].get_index("time").view(np.int64).tolist(), @@ -278,7 +283,7 @@ def process_cal(self, vehicle: str = "", name: str = "") -> None: # noqa: C901, ) self.aligned_nc[f"{instr}_latitude"].attrs = self.calibrated_nc["nudged_latitude"].attrs self.aligned_nc[f"{instr}_latitude"].attrs["comment"] += ( - f". Variable nudged_latitude from {in_fn} file linearly" + f". Variable nudged_latitude from {src_file} file linearly" f" interpolated onto {variable.split('_')[0]} time values." ) self.aligned_nc[f"{instr}_latitude"].attrs["long_name"] = "Latitude" @@ -294,7 +299,7 @@ def process_cal(self, vehicle: str = "", name: str = "") -> None: # noqa: C901, "nudged_longitude" ].attrs self.aligned_nc[f"{instr}_longitude"].attrs["comment"] += ( - f". Variable nudged_longitude from {in_fn} file linearly" + f". Variable nudged_longitude from {src_file} file linearly" f" interpolated onto {variable.split('_')[0]} time values." ) self.aligned_nc[f"{instr}_longitude"].attrs["long_name"] = "Longitude" @@ -373,6 +378,15 @@ def process_command_line(self): action="store", help="Mission directory, e.g.: 2020.064.10", ) + parser.add_argument( + "--log_file", + action="store", + help=( + "Path to the log file of original LRAUV data, e.g.: " + "brizo/missionlogs/2025/20250903_20250909/" + "20250905T072042/202509050720_202509051653.nc4" + ), + ) parser.add_argument( "--plot", action="store_true", @@ -401,6 +415,10 @@ def process_command_line(self): align_netcdf = Align_NetCDF() align_netcdf.process_command_line() p_start = time.time() - netcdf_dir = align_netcdf.process_cal() - align_netcdf.write_netcdf(netcdf_dir) + if align_netcdf.args.auv_name and align_netcdf.args.mission: + netcdf_dir = align_netcdf.process_cal() + align_netcdf.write_netcdf(netcdf_dir) + elif align_netcdf.args.log_file: + netcdf_dir = align_netcdf.process_cal(log_file=align_netcdf.args.log_file) + align_netcdf.write_netcdf(netcdf_dir) align_netcdf.logger.info("Time to process: %.2f seconds", (time.time() - p_start)) From 060f097e0bb595bf4e8e042b22e8907ea1be9b24 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 10 Nov 2025 10:11:45 -0800 Subject: [PATCH 09/28] Get seg_min as float(seconds) / 60.0. --- src/data/AUV.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/data/AUV.py b/src/data/AUV.py index 4d09ced6..8dd913e1 100755 --- a/src/data/AUV.py +++ b/src/data/AUV.py @@ -103,7 +103,9 @@ def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 dt_nudged = np.array([], dtype="datetime64[ns]") if segi.any(): # Return difference of numpy timestamps in units of minutes - seg_min = (lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]).astype("timedelta64[m]") + seg_min = (lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]).astype( + "timedelta64[s]" + ).astype(float) / 60.0 else: seg_min = 0 logger.info( From 8bcfeb169b647e9cb4fda42cfb39b8b07b26f61b Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 10 Nov 2025 10:47:07 -0800 Subject: [PATCH 10/28] Update docstring, remove unused methods, convert lon & lat from radians to degrees. --- src/data/combine.py | 257 ++++++++------------------------------------ 1 file changed, 42 insertions(+), 215 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index c186f68e..e5bb19a4 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -1,18 +1,21 @@ #!/usr/bin/env python """ -Combine original LRAUV data from separate .nc files and produce a single NetCDF -file that also contains corrected (nudged) latitudes and longitudes. +Combine original LRAUV data from separate *_Group_*.nc files and produce a +single NetCDF file that also contains corrected (nudged) latitudes and +longitudes. Read original data from netCDF files created by nc42netcdfs.py and write out a single netCDF file with the important variables at original sampling intervals. -Geometric alignment and any plumbing lag corrections are also done during this -step. This script is similar to calibrate.py that is used for Dorado and i2map -data, but does not apply any sensor calibrations as those are done on the LRAUV -vehicles before the data is logged and unserialized to NetCDF-4 files. The QC -methods implemented in calibrate.py will be reused here. +Any geometric alignment and any plumbing lag corrections can also be done during +this step. This script is similar to calibrate.py that is used for Dorado and +i2map data, but does not apply any sensor calibrations as those are done on the +LRAUV vehicles before the data is logged and unserialized to NetCDF4 files. The +QC methods implemented in calibrate.py may also be reused here. The calbrate.py +code is wrapped around the concept of "sensor" which has an anaolog in this code +of "group", but is too different to easily reuse. The file will contain combined variables (the combined_nc member variable) and -be analogous to the original NetCDF-4. Rather than using groups in NetCDF-4 the +be analogous to the original NetCDF4. Rather than using groups in NetCDF4 the data will be written in classic NetCDF-CF with a naming convention that is similar to Dorado data, with group names (any underscores removed) preceeding the variable name with an underscore - all lower case characters: @@ -25,8 +28,10 @@ _latitude _longitude ``` -The file will be named with a "_cal.nc" suffix to be consistent with the Dorado -and i2map files, indicating the stage of processing. +The file will be named with a "_combined.nc" suffix. It is analogous to the +"_cal.nc" suffix used for Dorado and i2map files and will provide a clear +indication of the stage of processing. The data are suiable for input to the +align.py script. """ @@ -43,10 +48,8 @@ from socket import gethostname from typing import NamedTuple import cf_xarray # Needed for the .cf accessor # noqa: F401 -import matplotlib.pyplot as plt import numpy as np import xarray as xr -from scipy.interpolate import interp1d import pandas as pd from AUV import monotonic_increasing_time_indices, nudge_positions @@ -61,11 +64,8 @@ class Range(NamedTuple): max: float -# Using lower case vehicle names, modify in _define_sensor_info() for changes -# over time Used to reduce ERROR & WARNING log messages for expected missing -# sensor data. There are core data common to most all vehicles, whose groups -# are listed in BASE_GROUPS. EXPECTED_GROUPS contains additional groups for -# specific vehicles. +# There are core data common to most all vehicles, whose groups are listed in +# BASE_GROUPS. EXPECTED_GROUPS contains additional groups for specific vehicles. BASE_GROUPS = { "lrauv": [ "CTDSeabird", @@ -74,75 +74,13 @@ class Range(NamedTuple): } EXPECTED_GROUPS = { - "dorado": [ - "navigation", - "gps", - "depth", - "ecopuck", - "hs2", - "ctd1", - "ctd2", - "isus", - "biolume", - "lopc", - "tailcone", - ], - "i2map": [ - "navigation", - "gps", - "depth", - "seabird25p", - "transmissometer", - "tailcone", + "pontus": [ + "WetLabsUBAT", ], } -# Used in test fixture in conftetst.py -EXPECTED_GROUPS["Dorado389"] = EXPECTED_GROUPS["dorado"] - - -def align_geom(sensor_offset, pitches): - """Use x & y sensor_offset values in meters from sensor_info and - pitch in degrees to compute and return actual depths of the sensor - based on the geometry relative to the vehicle's depth sensor. - """ - # See https://en.wikipedia.org/wiki/Rotation_matrix - # - # * instrument location with pitch applied - # / | - # / | - # / | - # / | - # / | - # / | - # / | - # / | - # / | - # / - # / - # / y - # / _ - # / o - # / f - # / f - # / * instrument location - # / | - # / \ | | - # / \ | y - # / pitch (theta) | | - # / \ | | - # --------------------x------------------+ --> nose - # - # [ cos(pitch) -sin(pitch) ] [x] [x'] - # X = - # [ sin(pitch) cos(pitch) ] [y] [y'] - offsets = [] - for pitch in pitches: - theta = pitch * np.pi / 180.0 - R = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) - x_off, y_off = np.matmul(R, sensor_offset) - offsets.append(y_off) - - return offsets +# Combine the BASE_GROUPS into each EXPECTED_GROUPS entry +for vehicle, groups in EXPECTED_GROUPS.items(): + EXPECTED_GROUPS[vehicle] = groups + BASE_GROUPS["lrauv"] class Combine_NetCDF: @@ -285,31 +223,6 @@ def _range_qc_combined_nc( # noqa: C901, PLR0912 self.combined_nc = self.combined_nc.drop_vars(qced_vars) self.logger.info("Done range checking %s", instrument) - def _apply_plumbing_lag( - self, - sensor: str, - time_index: pd.DatetimeIndex, - time_name: str, - ) -> tuple[xr.DataArray, str]: - """ - Apply plumbing lag to a time index in the combined netCDF file. - """ - # Convert lag_secs to milliseconds as np.timedelta64 neeeds an integer - lagged_time = time_index - np.timedelta64( - int(self.sinfo[sensor]["lag_secs"] * 1000), - "ms", - ) - # Need to update the sensor's time coordinate in the combined netCDF file - # so that DataArrays created with lagged_time fit onto the coordinate - self.combined_nc.coords[f"{sensor}_{time_name}"] = xr.DataArray( - lagged_time, - coords=[lagged_time], - dims={f"{sensor}_{time_name}"}, - name=f"{sensor}_{time_name}", - ) - lag_info = f"with plumbing lag correction of {self.sinfo[sensor]['lag_secs']} seconds" - return lagged_time, lag_info - def _biolume_process(self, sensor): try: orig_nc = getattr(self, sensor).orig_data @@ -418,117 +331,16 @@ def _biolume_process(self, sensor): set_to_nan=True, ) - def _geometric_depth_correction(self, sensor, orig_nc): - """Performs the align_geom() function from the legacy Matlab. - Works for any sensor, but requires navigation being processed first - as its variables in combined_nc are required. Returns corrected depth - array. - """ - # Fix pitch values to first and last points for interpolation to time - # values outside the range of the pitch values. - # See https://stackoverflow.com/a/45446546 - # and https://github.com/scipy/scipy/issues/12707#issuecomment-672794335 - try: - p_interp = interp1d( - self.combined_nc["navigation_time"].to_numpy().tolist(), - self.combined_nc["navigation_pitch"].to_numpy(), - fill_value=( - self.combined_nc["navigation_pitch"].to_numpy()[0], - self.combined_nc["navigation_pitch"].to_numpy()[-1], - ), - bounds_error=False, - ) - except KeyError: - error_message = "No navigation_time or navigation_pitch in combined_nc." - raise EOFError(error_message) from None - pitch = p_interp(orig_nc["time"].to_numpy().tolist()) - - d_interp = interp1d( - self.combined_nc["depth_time"].to_numpy().tolist(), - self.combined_nc["depth_filtdepth"].to_numpy(), - fill_value=( - self.combined_nc["depth_filtdepth"].to_numpy()[0], - self.combined_nc["depth_filtdepth"].to_numpy()[-1], - ), - bounds_error=False, - ) - orig_depth = d_interp(orig_nc["time"].to_numpy().tolist()) - offs_depth = align_geom(self.sinfo[sensor]["sensor_offset"], pitch) - - corrected_depth = xr.DataArray( - (orig_depth - offs_depth).astype(np.float64).tolist(), - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name=f"{sensor}_depth", - ) - # 2008.289.03 has self.combined_nc["depth_time"][-1] (2008-10-16T15:42:32) - # at lot less than orig_nc["time"][-1] (2008-10-16T16:24:43) - # which, with "extrapolate" causes wildly incorrect depths to -359 m - # There may be other cases where this happens, in which case we'd like - # a general solution. For now, we'll just correct this mission. - d_beg_time_diff = ( - orig_nc["time"].to_numpy()[0] - self.combined_nc["depth_time"].to_numpy()[0] - ) - d_end_time_diff = ( - orig_nc["time"].to_numpy()[-1] - self.combined_nc["depth_time"].to_numpy()[-1] - ) - self.logger.info( - "%s: d_beg_time_diff: %s, d_end_time_diff: %s", - sensor, - d_beg_time_diff.astype("timedelta64[s]"), - d_end_time_diff.astype("timedelta64[s]"), - ) - if self.args.mission in ( - "2008.289.03", - "2010.259.01", - "2010.259.02", - ): - # This could be a more general check for all missions, but let's restrict it - # to known problematic missions for now. The above info message can help - # determine if this is needed for other missions. - self.logger.info( - "%s: Special QC for mission %s: Setting corrected_depth to NaN for times after %s", - sensor, - self.args.mission, - self.combined_nc["depth_time"][-1].to_numpy(), - ) - corrected_depth[ - np.where( - orig_nc.get_index("time") > self.combined_nc["depth_time"].to_numpy()[-1], - ) - ] = np.nan - if self.args.plot: - plt.figure(figsize=(18, 6)) - plt.plot( - orig_nc["time"].to_numpy(), - orig_depth, - "-", - orig_nc["time"].to_numpy(), - corrected_depth, - "--", - orig_nc["time"].to_numpy(), - pitch, - ".", - ) - plt.ylabel("Depth (m) & Pitch (deg)") - plt.legend(("Original depth", "Pitch corrected depth", "Pitch")) - plt.title( - f"Original and pitch corrected depth for {self.args.auv_name} {self.args.mission}", - ) - plt.show() - - return corrected_depth - def _nudge_pos(self, max_sec_diff_at_end=10): """Apply linear nudges to underwater latitudes and longitudes so that they match the surface gps positions. """ try: - lon = self.combined_nc["universals_longitude"] + lon = self.combined_nc["universals_longitude"] * 180.0 / np.pi except KeyError: error_message = "No universals_longitude data in combined_nc" raise EOFError(error_message) from None - lat = self.combined_nc["universals_latitude"] + lat = self.combined_nc["universals_latitude"] * 180.0 / np.pi lon_fix = self.combined_nc["nal9602_longitude_fix"] lat_fix = self.combined_nc["nal9602_latitude_fix"] @@ -555,6 +367,7 @@ def combine_groups(self): log_file = self.args.log_file src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) group_files = sorted(src_dir.glob(f"{Path(log_file).stem}_{GROUP}_*.nc")) + self.summary_fields = set() self.combined_nc = xr.Dataset() for group_file in group_files: self.logger.info("Group file: %s", group_file.name) @@ -567,10 +380,23 @@ def combine_groups(self): new_group = group_file.stem.split(f"{GROUP}_")[1].replace("_", "").lower() new_var = new_group + "_" + orig_var.lower() self.logger.info("Adding variable %-65s %s", f"{orig_var} as", new_var) - self.combined_nc[new_var] = ds[orig_var] + if ( + orig_var in ("latitude", "longitude") + and ds[orig_var].attrs.get("units") == "radians" + ): + # Convert radians to degrees + self.combined_nc[new_var] = ds[orig_var] * 180.0 / np.pi + self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() + self.combined_nc[new_var].attrs["units"] = "degrees" + else: + self.combined_nc[new_var] = ds[orig_var] # Add nudged longitude and latitude variables to the combined_nc dataset - nudged_longitude, nudged_latitude = self._nudge_pos() + try: + nudged_longitude, nudged_latitude = self._nudge_pos() + except ValueError as e: + self.logger.error("Nudging positions failed: %s", e) # noqa: TRY400 + return self.combined_nc["nudged_longitude"] = nudged_longitude self.combined_nc["nudged_longitude"].attrs = { "long_name": "Nudged Longitude", @@ -589,7 +415,7 @@ def combine_groups(self): def write_netcdf(self) -> None: log_file = self.args.log_file netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) - out_fn = Path(netcdfs_dir, f"{Path(log_file).stem}_cal.nc") + out_fn = Path(netcdfs_dir, f"{Path(log_file).stem}_combined.nc") self.combined_nc.attrs = self.global_metadata() self.logger.info("Writing combined group data to %s", out_fn) @@ -600,6 +426,7 @@ def write_netcdf(self) -> None: "Data variables written: %s", ", ".join(sorted(self.combined_nc.variables)), ) + self.logger.info("Wrote combined (_combined.nc) netCDF file: %s", out_fn) return netcdfs_dir From 3c7ccdebd910a33c4e8c1ecde276b8e2f09ae0d5 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 10 Nov 2025 10:48:59 -0800 Subject: [PATCH 11/28] Have combine.py write a *_combined.nc file. --- LRAUV_WORKFLOW.md | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/LRAUV_WORKFLOW.md b/LRAUV_WORKFLOW.md index 98307671..4fcfc26f 100644 --- a/LRAUV_WORKFLOW.md +++ b/LRAUV_WORKFLOW.md @@ -13,24 +13,26 @@ on the local file system's work directory is as follows: │ │ ├── <- e.g.: ahi, brizo, pontus, tethys, ... │ │ │ ├── missionlogs/year/dlist_dir │ │ │ │ ├── <- e.g.: ahi/missionlogs/2025/20250908_20250912/20250911T201546/202509112015_202509112115.nc4 - │ │ │ │ │ ├── <- .nc4 file containing original data + │ │ │ │ │ ├── <- .nc4 file containing original data - created by unserialize │ │ │ │ │ ├── <- .nc files, one for each group from the .nc4 file - | | | | | | data identical to original in NETCDF4 format - │ │ │ │ │ ├── <_cal> <- A single NETCDF3 .nc file containing all the - | | | | | | varibles from the .nc files along with nudged - | | | | | | latitudes and longitudes - created by combine.py + | | | | | | data identical to original in NetCDF4 format, + | | | | | | but in more interoperable NetCDF3 format + | | | | | | - created by nc42netcdfs.py + │ │ │ │ │ ├── <_combined> <- A single NetCDF3 .nc file containing all the + | | | | | | varibles from the .nc files along with nudged + | | | | | | latitudes and longitudes - created by combine.py │ │ │ │ │ ├── <_align> <- .nc file with all measurement variables | | | | | | having associated coordinate variables - | | | | | | at original instrument sampling rate - - | | | | | | created by align.py + | | | | | | at original instrument sampling rate + | | | | | | - created by align.py │ │ │ │ │ ├── <_nS> <- .nc file with all measurement variables resampled to a common time grid at n Second intervals - created by resample.py nc42netcdfs.py Extract the groups and the variables we want from the groups into - individual .nc files. These data are saved using NETCDF4 format as - there are many unlimited dimensions that are not allowed in NETCDF3. + individual .nc files. These data are saved using NetCDF4 format as + there are many unlimited dimensions that are not allowed in NetCDF3. The data in the .nc files are identical to what is in the .nc4 groups. combine.py From e18d5de98c4b4379ce8623dd7774cb2cfc830471 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 10 Nov 2025 10:49:32 -0800 Subject: [PATCH 12/28] Start testing align.py for lrauv log_files. --- .vscode/launch.json | 13 +++++++++---- src/data/process.py | 7 +++++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 4c8e7641..59e53006 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -54,9 +54,12 @@ "console": "integratedTerminal", // A small log_file that has a reasonable amount of data, and known_hash to verify download //"args": ["-v", "1", "--log_file", "ahi/missionlogs/2025/20250908_20250912/20250911T201546/202509112015_202509112115.nc4", "--known_hash", "d1235ead55023bea05e9841465d54a45dfab007a283320322e28b84438fb8a85"] - // Has bad latitude and longitude values and lots of bad Universal latitude_time values + // brizo 20250914T080941 has bad latitude and longitude values and lots of bad Universal latitude_time and longitude_time values //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] - "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/latitude_time"] + //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/longitude_time"] + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/latitude_time"] + // brizo 20250916T230652 has several ESP Samples from stoqs_lrauv_sep2025 + "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot_time", "/longitude_time"] }, { "name": "2.0 - calibrate.py", @@ -112,7 +115,8 @@ "program": "${workspaceFolder}/src/data/combine.py", "console": "integratedTerminal", "justMyCode": false, - "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] + "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] }, { "name": "3.0 - align.py", @@ -321,7 +325,8 @@ "console": "integratedTerminal", //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber"] - "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber", "--no_cleanup"] + //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber", "--no_cleanup"] + "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup"] //"args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901", "--end", "20121101", "--noinput"] }, diff --git a/src/data/process.py b/src/data/process.py index fbb0428d..6856f3db 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -300,7 +300,7 @@ def calibrate(self, mission: str) -> None: cal_netcdf.logger.error("%s %s", mission, e) # noqa: TRY400 cal_netcdf.logger.removeHandler(self.log_handler) - def align(self, mission: str) -> None: + def align(self, mission: str = "", log_file: str = "") -> None: self.logger.info("Alignment steps for %s", mission) align_netcdf = Align_NetCDF() align_netcdf.args = argparse.Namespace() @@ -313,7 +313,10 @@ def align(self, mission: str) -> None: align_netcdf.logger.addHandler(self.log_handler) align_netcdf.commandline = self.commandline try: - netcdf_dir = align_netcdf.process_cal() + if log_file: + netcdf_dir = align_netcdf.process_cal(log_file=log_file) + else: + netcdf_dir = align_netcdf.process_cal() align_netcdf.write_netcdf(netcdf_dir) except (FileNotFoundError, EOFError) as e: align_netcdf.logger.error("%s %s", mission, e) # noqa: TRY400 From 475cbec99a9bb9eeefd1a683e3bd970f36460535 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 10 Nov 2025 12:30:57 -0800 Subject: [PATCH 13/28] Add additional diagnostic mesages for severe dead reckoned time sync problems. --- src/data/nc42netcdfs.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 9f0e54f1..fe3f286e 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -401,6 +401,24 @@ def _analyze_original_time_coordinates( percent_different, max_abs_diff, ) + self.logger.warning( + "Dead Reckoned timing: Differences begin at index %d", + np.where(non_zero_mask)[0][0], + ) + lon_subset = lon_time[ + max(0, np.where(non_zero_mask)[0][0] - 5) : np.where(non_zero_mask)[0][0] + 5 + ] + lat_subset = lat_time[ + max(0, np.where(non_zero_mask)[0][0] - 5) : np.where(non_zero_mask)[0][0] + 5 + ] + self.logger.warning( + "Dead Reckoned timing: longitude_time around this index: %s", + " ".join(f"{val:14.2f}" for val in lon_subset), + ) + self.logger.warning( + "Dead Reckoned timing: latitude_time around this index: %s", + " ".join(f"{val:14.2f}" for val in lat_subset), + ) elif percent_different > MINOR_PERCENT_THRESHOLD or max_abs_diff > MINOR_TIME_THRESHOLD: self.logger.warning( "Dead Reckoned timing: Minor synchronization issues detected - " From 455ccc5c0c10222c251721c5e29992a919a705e4 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 10 Nov 2025 19:05:00 -0800 Subject: [PATCH 14/28] Read in Group files with decode_cf=False, use xr.DataArray() to construct new variables. --- src/data/combine.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index e5bb19a4..0d72af4e 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -107,13 +107,13 @@ def global_metadata(self): metadata["featureType"] = "trajectory" try: metadata["time_coverage_start"] = str( - self.combined_nc["depth_time"].to_pandas().iloc[0].isoformat(), + pd.to_datetime(self.combined_nc["depth_time"].values, unit="s")[0].isoformat(), ) except KeyError: error_message = "No depth_time variable in combined_nc" raise EOFError(error_message) from None metadata["time_coverage_end"] = str( - self.combined_nc["depth_time"].to_pandas().iloc[-1].isoformat(), + pd.to_datetime(self.combined_nc["depth_time"].values, unit="s")[-1].isoformat(), ) metadata["distribution_statement"] = "Any use requires prior approval from MBARI" metadata["license"] = metadata["distribution_statement"] @@ -371,25 +371,36 @@ def combine_groups(self): self.combined_nc = xr.Dataset() for group_file in group_files: self.logger.info("Group file: %s", group_file.name) - # Loop through each variable in the group file and add it to the combined_nc member list - with xr.open_dataset(group_file) as ds: + with xr.open_dataset(group_file, decode_cf=False) as ds: + # New group name is loawercase with underscores removed + group_name = group_file.stem.split(f"{GROUP}_")[1].replace("_", "").lower() + for orig_var in ds.variables: if orig_var.lower().endswith("time"): - self.logger.debug("Skipping time variable: %s", orig_var) + self.logger.info("Skipping time variable: %s", orig_var) continue - new_group = group_file.stem.split(f"{GROUP}_")[1].replace("_", "").lower() - new_var = new_group + "_" + orig_var.lower() + new_var = group_name + "_" + orig_var.lower() self.logger.info("Adding variable %-65s %s", f"{orig_var} as", new_var) if ( orig_var in ("latitude", "longitude") and ds[orig_var].attrs.get("units") == "radians" ): # Convert radians to degrees - self.combined_nc[new_var] = ds[orig_var] * 180.0 / np.pi + self.combined_nc[new_var] = xr.DataArray( + ds[orig_var].to_numpy() * 180.0 / np.pi, + coords=ds[orig_var].coords, + dims=ds[orig_var].dims, + ) self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() self.combined_nc[new_var].attrs["units"] = "degrees" + else: - self.combined_nc[new_var] = ds[orig_var] + self.combined_nc[new_var] = xr.DataArray( + ds[orig_var].to_numpy(), + coords=ds[orig_var].coords, + dims=ds[orig_var].dims, + ) + self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() # Add nudged longitude and latitude variables to the combined_nc dataset try: From b31fb8d675b5c934a8894e84d69c8e9bfbef6f33 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 11 Nov 2025 10:17:51 -0800 Subject: [PATCH 15/28] Add _consolidate_group_time_coords() and set dims and coords from its analysis. --- src/data/combine.py | 119 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 112 insertions(+), 7 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index 0d72af4e..f8c109e8 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -363,6 +363,100 @@ def _nudge_pos(self, max_sec_diff_at_end=10): return lon_nudged, lat_nudged + def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dict: + """Analyze and consolidate time coordinates for a group. + + Returns: + dict: Contains consolidated time info with keys: + - consolidated_time_name: name of consolidated coordinate (or None) + - consolidated_time_data: the time coordinate data (or None) + - time_coord_mapping: dict mapping original dims to consolidated dims + """ + # Find all time variables in this group + time_vars = {var: ds[var] for var in ds.variables if var.lower().endswith("time")} + + if not time_vars: + return { + "consolidated_time_name": None, + "consolidated_time_data": None, + "time_coord_mapping": {}, + } + + if len(time_vars) == 1: + # Single time coordinate - use it as consolidated + time_name = list(time_vars.keys())[0] + consolidated_name = f"{group_name}_time" + self.logger.info( + "Group %s: Single time coordinate '%s' - using as '%s'", + group_name, + time_name, + consolidated_name, + ) + return { + "consolidated_time_name": consolidated_name, + "consolidated_time_data": ds[time_name], + "time_coord_mapping": {time_name: consolidated_name}, + } + + # Multiple time coordinates - check if they're identical + time_arrays = list(time_vars.values()) + first_time = time_arrays[0] + first_time_name = list(time_vars.keys())[0] + + all_identical = True + for i, (_name, time_array) in enumerate(time_vars.items()): + if i == 0: + continue # Skip first one (reference) + + # Compare sizes first + if len(time_array) != len(first_time): + all_identical = False + break + + # Compare values with tolerance + try: + if not np.allclose(time_array.values, first_time.values, atol=1e-6): + all_identical = False + break + except TypeError: + # Handle datetime arrays + if not np.array_equal(time_array.values, first_time.values): + all_identical = False + break + + if all_identical: + # All time coordinates are identical - consolidate them + consolidated_name = f"{group_name}_time" + time_coord_mapping = dict.fromkeys(time_vars, consolidated_name) + + self.logger.info( + "Group %s: All %d time coordinates identical - consolidating to '%s'", + group_name, + len(time_vars), + consolidated_name, + ) + + return { + "consolidated_time_name": consolidated_name, + "consolidated_time_data": ds[first_time_name], + "time_coord_mapping": time_coord_mapping, + } + + # Time coordinates differ - keep them separate + time_coord_mapping = {name: f"{group_name}_{name.lower()}" for name in time_vars} + + self.logger.warning( + "Group %s: Time coordinates differ - keeping separate: %s", + group_name, + list(time_vars.keys()), + ) + + return { + "consolidated_time_name": None, + "consolidated_time_data": None, + "time_coord_mapping": time_coord_mapping, + } + def combine_groups(self): log_file = self.args.log_file src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) @@ -372,12 +466,12 @@ def combine_groups(self): for group_file in group_files: self.logger.info("Group file: %s", group_file.name) with xr.open_dataset(group_file, decode_cf=False) as ds: - # New group name is loawercase with underscores removed + # Group name to prepend variable names is lowercase with underscores removed group_name = group_file.stem.split(f"{GROUP}_")[1].replace("_", "").lower() + time_info = self._consolidate_group_time_coords(ds, group_name) for orig_var in ds.variables: if orig_var.lower().endswith("time"): - self.logger.info("Skipping time variable: %s", orig_var) continue new_var = group_name + "_" + orig_var.lower() self.logger.info("Adding variable %-65s %s", f"{orig_var} as", new_var) @@ -388,20 +482,31 @@ def combine_groups(self): # Convert radians to degrees self.combined_nc[new_var] = xr.DataArray( ds[orig_var].to_numpy() * 180.0 / np.pi, - coords=ds[orig_var].coords, - dims=ds[orig_var].dims, + dims=[time_info["time_coord_mapping"][ds[orig_var].dims[0]]], + coords=[ds[orig_var].get_index(orig_var + "_time")], ) self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() self.combined_nc[new_var].attrs["units"] = "degrees" - else: self.combined_nc[new_var] = xr.DataArray( ds[orig_var].to_numpy(), - coords=ds[orig_var].coords, - dims=ds[orig_var].dims, + dims=[time_info["time_coord_mapping"][ds[orig_var].dims[0]]], + coords=[ds[orig_var].get_index(orig_var + "_time")], ) self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() + # Construct useful comment for consolidated time coordinate + if time_info["consolidated_time_name"] in self.combined_nc.variables: + mapping_info = ", ".join( + [ + f"{orig} -> {new}" + for orig, new in time_info["time_coord_mapping"].items() + ] + ) + self.combined_nc[time_info["consolidated_time_name"]].attrs["comment"] = ( + f"Consolidated time coordinate from: {mapping_info}" + ) + # Add nudged longitude and latitude variables to the combined_nc dataset try: nudged_longitude, nudged_latitude = self._nudge_pos() From e956d0330136d29ee6059e821baf4098ebc04447 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 11 Nov 2025 10:18:30 -0800 Subject: [PATCH 16/28] Only set units to radians for lat & lon in the / group. --- src/data/nc42netcdfs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index fe3f286e..ed778f82 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -811,7 +811,7 @@ def _copy_variable_with_appropriate_time_filter( # noqa: C901, PLR0912 # Override any coordinates attribute in src with just the time coordinate dst_var.setncattr("coordinates", var_name + "_time") # Downstream process uses cf_xarray to recognize coordinates, add required attribute - if var_name.startswith(("longitude", "latitude")): + if src_group.name == "/" and var_name.startswith(("longitude", "latitude")): dst_var.setncattr("units", "radians") elif var_name.startswith("depth"): dst_var.setncattr("units", "meters") From 50376aa518c9667b17ad9986fc4dec88dbca4720 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 11 Nov 2025 12:09:15 -0800 Subject: [PATCH 17/28] Add required metadata for cf decoding, write intermediate file so that cf decoding can be used for nudge_positions(). --- src/data/combine.py | 64 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 15 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index f8c109e8..bcb96414 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -107,13 +107,13 @@ def global_metadata(self): metadata["featureType"] = "trajectory" try: metadata["time_coverage_start"] = str( - pd.to_datetime(self.combined_nc["depth_time"].values, unit="s")[0].isoformat(), + pd.to_datetime(self.combined_nc["universals_time"].values, unit="s")[0].isoformat(), ) except KeyError: - error_message = "No depth_time variable in combined_nc" + error_message = "No universals_time variable in combined_nc" raise EOFError(error_message) from None metadata["time_coverage_end"] = str( - pd.to_datetime(self.combined_nc["depth_time"].values, unit="s")[-1].isoformat(), + pd.to_datetime(self.combined_nc["universals_time"].values, unit="s")[-1].isoformat(), ) metadata["distribution_statement"] = "Any use requires prior approval from MBARI" metadata["license"] = metadata["distribution_statement"] @@ -332,19 +332,20 @@ def _biolume_process(self, sensor): ) def _nudge_pos(self, max_sec_diff_at_end=10): - """Apply linear nudges to underwater latitudes and longitudes so that - they match the surface gps positions. + """Match variables from lrauv processing to those needed by + AUV.nudged_positions() so that linear nudges to underwater dead reckoned + positions will match the GPS positions at the surface. """ try: - lon = self.combined_nc["universals_longitude"] * 180.0 / np.pi + lon = self.combined_nc["universals_longitude"] except KeyError: error_message = "No universals_longitude data in combined_nc" raise EOFError(error_message) from None - lat = self.combined_nc["universals_latitude"] * 180.0 / np.pi + lat = self.combined_nc["universals_latitude"] lon_fix = self.combined_nc["nal9602_longitude_fix"] lat_fix = self.combined_nc["nal9602_latitude_fix"] - # Use the shared function from AUV module + # Use the shared nudge_positions() function from AUV module lon_nudged, lat_nudged, segment_count, segment_minsum = nudge_positions( nav_longitude=lon, nav_latitude=lat, @@ -357,10 +358,6 @@ def _nudge_pos(self, max_sec_diff_at_end=10): create_plots=True, ) - # Store results in instance variables for compatibility - self.segment_count = segment_count - self.segment_minsum = segment_minsum - return lon_nudged, lat_nudged def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dict: @@ -465,6 +462,7 @@ def combine_groups(self): self.combined_nc = xr.Dataset() for group_file in group_files: self.logger.info("Group file: %s", group_file.name) + # Open group file without decoding to have np.allclose work properly with xr.open_dataset(group_file, decode_cf=False) as ds: # Group name to prepend variable names is lowercase with underscores removed group_name = group_file.stem.split(f"{GROUP}_")[1].replace("_", "").lower() @@ -474,15 +472,15 @@ def combine_groups(self): if orig_var.lower().endswith("time"): continue new_var = group_name + "_" + orig_var.lower() + dim_name = time_info["time_coord_mapping"][ds[orig_var].dims[0]] self.logger.info("Adding variable %-65s %s", f"{orig_var} as", new_var) if ( orig_var in ("latitude", "longitude") and ds[orig_var].attrs.get("units") == "radians" ): - # Convert radians to degrees self.combined_nc[new_var] = xr.DataArray( ds[orig_var].to_numpy() * 180.0 / np.pi, - dims=[time_info["time_coord_mapping"][ds[orig_var].dims[0]]], + dims=[dim_name], coords=[ds[orig_var].get_index(orig_var + "_time")], ) self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() @@ -490,11 +488,17 @@ def combine_groups(self): else: self.combined_nc[new_var] = xr.DataArray( ds[orig_var].to_numpy(), - dims=[time_info["time_coord_mapping"][ds[orig_var].dims[0]]], + dims=[dim_name], coords=[ds[orig_var].get_index(orig_var + "_time")], ) self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() + # Add metadata required for cf_xarray decoding + self.combined_nc[new_var].coords[dim_name].attrs["units"] = ( + "seconds since 1970-01-01T00:00:00Z" + ) + self.combined_nc[new_var].coords[dim_name].attrs["standard_name"] = "time" + # Construct useful comment for consolidated time coordinate if time_info["consolidated_time_name"] in self.combined_nc.variables: mapping_info = ", ".join( @@ -507,6 +511,12 @@ def combine_groups(self): f"Consolidated time coordinate from: {mapping_info}" ) + # Write out an intermediate netCDF file so that cf_xarray can decode + # the data properly for nudging positions + intermediate_file = self._intermediate_write_netcdf() + with xr.open_dataset(intermediate_file, decode_cf=True) as ds: + self.combined_nc = ds.load() + # Add nudged longitude and latitude variables to the combined_nc dataset try: nudged_longitude, nudged_latitude = self._nudge_pos() @@ -527,6 +537,30 @@ def combine_groups(self): "units": "degrees_north", "comment": "Dead reckoned latitude nudged to GPS positions", } + # Remove the intermediate file + Path(intermediate_file).unlink() + + def _intermediate_write_netcdf(self) -> None: + """Write out an intermediate combined netCDF file so that data can be + read using decode_cf=True for nudge_positions() to work with cf accessors.""" + log_file = self.args.log_file + netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) + out_fn = Path(netcdfs_dir, f"{Path(log_file).stem}_combined_intermediate.nc") + + self.combined_nc.attrs = self.global_metadata() + self.logger.info("Writing intermediate combined group data to %s", out_fn) + if Path(out_fn).exists(): + Path(out_fn).unlink() + self.combined_nc.to_netcdf(out_fn) + self.logger.info( + "Data variables written: %s", + ", ".join(sorted(self.combined_nc.variables)), + ) + self.logger.info( + "Wrote intermediate (_combined_intermediate.nc) netCDF file: %s", + out_fn, + ) + return out_fn def write_netcdf(self) -> None: log_file = self.args.log_file From 019bdb13b76cfb6bebf8c3b0cb0a81c3db15d8a1 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 11 Nov 2025 16:34:23 -0800 Subject: [PATCH 18/28] WIP: Add time coordinate(s) to combined_nc - still having problems in writing all the universals to the file. --- src/data/combine.py | 46 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index bcb96414..63f123a3 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -427,9 +427,8 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic time_coord_mapping = dict.fromkeys(time_vars, consolidated_name) self.logger.info( - "Group %s: All %d time coordinates identical - consolidating to '%s'", - group_name, - len(time_vars), + "%-65s %s", + f"Consoliding {len(time_vars)} coordinates to", consolidated_name, ) @@ -468,6 +467,39 @@ def combine_groups(self): group_name = group_file.stem.split(f"{GROUP}_")[1].replace("_", "").lower() time_info = self._consolidate_group_time_coords(ds, group_name) + # Add time coordinate(s) to combined_nc + if time_info["consolidated_time_name"]: + self.logger.info( + "Adding consolidated time coordinate %-45s %s", + f"{time_info['consolidated_time_name']} as", + time_info["consolidated_time_name"], + ) + self.combined_nc[time_info["consolidated_time_name"]] = xr.DataArray( + time_info["consolidated_time_data"].to_numpy(), + dims=[time_info["consolidated_time_name"]], + coords={ + time_info["consolidated_time_name"]: time_info[ + "consolidated_time_data" + ].to_numpy() + }, + ) + self.combined_nc[time_info["consolidated_time_name"]].attrs = time_info[ + "consolidated_time_data" + ].attrs.copy() + else: + for orig_time_var, new_time_var in time_info["time_coord_mapping"].items(): + self.logger.info( + "Adding time coordinate %-58s %s", + f"{orig_time_var} as", + new_time_var, + ) + self.combined_nc[new_time_var] = xr.DataArray( + ds[orig_time_var].to_numpy(), + dims=[new_time_var], + coords={new_time_var: ds[orig_time_var].to_numpy()}, + ) + self.combined_nc[new_time_var].attrs = ds[orig_time_var].attrs.copy() + for orig_var in ds.variables: if orig_var.lower().endswith("time"): continue @@ -481,7 +513,9 @@ def combine_groups(self): self.combined_nc[new_var] = xr.DataArray( ds[orig_var].to_numpy() * 180.0 / np.pi, dims=[dim_name], - coords=[ds[orig_var].get_index(orig_var + "_time")], + coords={ + dim_name: ds[orig_var].get_index(orig_var + "_time").to_numpy() + }, ) self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() self.combined_nc[new_var].attrs["units"] = "degrees" @@ -489,7 +523,9 @@ def combine_groups(self): self.combined_nc[new_var] = xr.DataArray( ds[orig_var].to_numpy(), dims=[dim_name], - coords=[ds[orig_var].get_index(orig_var + "_time")], + coords={ + dim_name: ds[orig_var].get_index(orig_var + "_time").to_numpy() + }, ) self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() From d34cf2b5ac60fc8aed67cf3db9019b17f4d7e0b7 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 12 Nov 2025 13:17:19 -0800 Subject: [PATCH 19/28] Factor out several small methods to make combine_groups() less complex, add more log statements. --- src/data/combine.py | 317 ++++++++++++++++++++++++++------------------ 1 file changed, 188 insertions(+), 129 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index 63f123a3..05a48307 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -331,35 +331,6 @@ def _biolume_process(self, sensor): set_to_nan=True, ) - def _nudge_pos(self, max_sec_diff_at_end=10): - """Match variables from lrauv processing to those needed by - AUV.nudged_positions() so that linear nudges to underwater dead reckoned - positions will match the GPS positions at the surface. - """ - try: - lon = self.combined_nc["universals_longitude"] - except KeyError: - error_message = "No universals_longitude data in combined_nc" - raise EOFError(error_message) from None - lat = self.combined_nc["universals_latitude"] - lon_fix = self.combined_nc["nal9602_longitude_fix"] - lat_fix = self.combined_nc["nal9602_latitude_fix"] - - # Use the shared nudge_positions() function from AUV module - lon_nudged, lat_nudged, segment_count, segment_minsum = nudge_positions( - nav_longitude=lon, - nav_latitude=lat, - gps_longitude=lon_fix, - gps_latitude=lat_fix, - logger=self.logger, - auv_name="", - mission="", - max_sec_diff_at_end=max_sec_diff_at_end, - create_plots=True, - ) - - return lon_nudged, lat_nudged - def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dict: """Analyze and consolidate time coordinates for a group. @@ -408,17 +379,37 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic # Compare sizes first if len(time_array) != len(first_time): all_identical = False + self.logger.debug( + "Group %s: Time coordinate '%s' length %d differs from '%s' length %d", + group_name, + _name, + len(time_array), + first_time_name, + len(first_time), + ) break # Compare values with tolerance try: if not np.allclose(time_array.values, first_time.values, atol=1e-6): all_identical = False + self.logger.debug( + "Group %s: Time coordinate '%s' values differ from '%s'", + group_name, + _name, + first_time_name, + ) break except TypeError: # Handle datetime arrays if not np.array_equal(time_array.values, first_time.values): all_identical = False + self.logger.debug( + "Group %s: Time coordinate '%s' values differ from '%s'", + group_name, + _name, + first_time_name, + ) break if all_identical: @@ -453,112 +444,146 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic "time_coord_mapping": time_coord_mapping, } - def combine_groups(self): - log_file = self.args.log_file - src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) - group_files = sorted(src_dir.glob(f"{Path(log_file).stem}_{GROUP}_*.nc")) - self.summary_fields = set() - self.combined_nc = xr.Dataset() - for group_file in group_files: - self.logger.info("Group file: %s", group_file.name) - # Open group file without decoding to have np.allclose work properly - with xr.open_dataset(group_file, decode_cf=False) as ds: - # Group name to prepend variable names is lowercase with underscores removed - group_name = group_file.stem.split(f"{GROUP}_")[1].replace("_", "").lower() - time_info = self._consolidate_group_time_coords(ds, group_name) + def _add_time_coordinates_to_combined(self, time_info: dict, ds: xr.Dataset) -> None: + """Add time coordinates to the combined dataset.""" + if time_info["consolidated_time_name"]: + self._add_consolidated_time_coordinate(time_info) + else: + self._add_separate_time_coordinates(time_info, ds) - # Add time coordinate(s) to combined_nc - if time_info["consolidated_time_name"]: - self.logger.info( - "Adding consolidated time coordinate %-45s %s", - f"{time_info['consolidated_time_name']} as", - time_info["consolidated_time_name"], - ) - self.combined_nc[time_info["consolidated_time_name"]] = xr.DataArray( - time_info["consolidated_time_data"].to_numpy(), - dims=[time_info["consolidated_time_name"]], - coords={ - time_info["consolidated_time_name"]: time_info[ - "consolidated_time_data" - ].to_numpy() - }, - ) - self.combined_nc[time_info["consolidated_time_name"]].attrs = time_info[ - "consolidated_time_data" - ].attrs.copy() - else: - for orig_time_var, new_time_var in time_info["time_coord_mapping"].items(): - self.logger.info( - "Adding time coordinate %-58s %s", - f"{orig_time_var} as", - new_time_var, - ) - self.combined_nc[new_time_var] = xr.DataArray( - ds[orig_time_var].to_numpy(), - dims=[new_time_var], - coords={new_time_var: ds[orig_time_var].to_numpy()}, - ) - self.combined_nc[new_time_var].attrs = ds[orig_time_var].attrs.copy() - - for orig_var in ds.variables: - if orig_var.lower().endswith("time"): - continue - new_var = group_name + "_" + orig_var.lower() - dim_name = time_info["time_coord_mapping"][ds[orig_var].dims[0]] - self.logger.info("Adding variable %-65s %s", f"{orig_var} as", new_var) - if ( - orig_var in ("latitude", "longitude") - and ds[orig_var].attrs.get("units") == "radians" - ): - self.combined_nc[new_var] = xr.DataArray( - ds[orig_var].to_numpy() * 180.0 / np.pi, - dims=[dim_name], - coords={ - dim_name: ds[orig_var].get_index(orig_var + "_time").to_numpy() - }, - ) - self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() - self.combined_nc[new_var].attrs["units"] = "degrees" - else: - self.combined_nc[new_var] = xr.DataArray( - ds[orig_var].to_numpy(), - dims=[dim_name], - coords={ - dim_name: ds[orig_var].get_index(orig_var + "_time").to_numpy() - }, - ) - self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() + def _add_consolidated_time_coordinate(self, time_info: dict) -> None: + """Add a consolidated time coordinate to the combined dataset.""" + time_name = time_info["consolidated_time_name"] + self.logger.info( + "Adding consolidated time coordinate %-45s %s", + f"{time_name} as", + time_name, + ) + self.combined_nc[time_name] = xr.DataArray( + time_info["consolidated_time_data"].to_numpy(), + dims=[time_name], + coords={time_name: time_info["consolidated_time_data"].to_numpy()}, + ) + self.combined_nc[time_name].attrs = time_info["consolidated_time_data"].attrs.copy() - # Add metadata required for cf_xarray decoding - self.combined_nc[new_var].coords[dim_name].attrs["units"] = ( - "seconds since 1970-01-01T00:00:00Z" - ) - self.combined_nc[new_var].coords[dim_name].attrs["standard_name"] = "time" - - # Construct useful comment for consolidated time coordinate - if time_info["consolidated_time_name"] in self.combined_nc.variables: - mapping_info = ", ".join( - [ - f"{orig} -> {new}" - for orig, new in time_info["time_coord_mapping"].items() - ] - ) - self.combined_nc[time_info["consolidated_time_name"]].attrs["comment"] = ( - f"Consolidated time coordinate from: {mapping_info}" - ) + def _add_separate_time_coordinates(self, time_info: dict, ds: xr.Dataset) -> None: + """Add separate time coordinates to the combined dataset.""" + for orig_time_var, new_time_var in time_info["time_coord_mapping"].items(): + self.logger.info( + "Adding time coordinate %-58s %s", + f"{orig_time_var} as", + new_time_var, + ) + self.combined_nc[new_time_var] = xr.DataArray( + ds[orig_time_var].to_numpy(), + dims=[new_time_var], + coords={new_time_var: ds[orig_time_var].to_numpy()}, + ) + self.combined_nc[new_time_var].attrs = ds[orig_time_var].attrs.copy() + + def _get_time_coordinate_data(self, time_info: dict, ds: xr.Dataset, orig_time_dim: str): + """Get the appropriate time coordinate data for a variable.""" + if time_info["consolidated_time_name"]: + return time_info["consolidated_time_data"].to_numpy() + return ds[orig_time_dim].to_numpy() + + def _create_data_array_for_variable( + self, ds: xr.Dataset, orig_var: str, dim_name: str, time_coord_data + ) -> xr.DataArray: + """Create a DataArray for a variable, handling unit conversions.""" + if orig_var in ("latitude", "longitude") and ds[orig_var].attrs.get("units") == "radians": + data_array = xr.DataArray( + ds[orig_var].to_numpy() * 180.0 / np.pi, + dims=[dim_name], + coords={dim_name: time_coord_data}, + ) + data_array.attrs = ds[orig_var].attrs.copy() + data_array.attrs["units"] = "degrees" + else: + data_array = xr.DataArray( + ds[orig_var].to_numpy(), + dims=[dim_name], + coords={dim_name: time_coord_data}, + ) + data_array.attrs = ds[orig_var].attrs.copy() + return data_array - # Write out an intermediate netCDF file so that cf_xarray can decode - # the data properly for nudging positions - intermediate_file = self._intermediate_write_netcdf() - with xr.open_dataset(intermediate_file, decode_cf=True) as ds: - self.combined_nc = ds.load() + def _add_time_metadata_to_variable(self, var_name: str, dim_name: str) -> None: + """Add required time metadata for cf_xarray decoding.""" + self.combined_nc[var_name].coords[dim_name].attrs["units"] = ( + "seconds since 1970-01-01T00:00:00Z" + ) + self.combined_nc[var_name].coords[dim_name].attrs["standard_name"] = "time" + + def _process_group_variables(self, ds: xr.Dataset, group_name: str, time_info: dict) -> None: + """Process all data variables in a group.""" + for orig_var in ds.variables: + if orig_var.lower().endswith("time"): + continue + + # Skip scalar variables (no dimensions) + if len(ds[orig_var].dims) == 0: + self.logger.debug("Skipping scalar variable: %s", orig_var) + continue + + new_var = group_name + "_" + orig_var.lower() - # Add nudged longitude and latitude variables to the combined_nc dataset + # Get the original time dimension for this variable + orig_time_dim = ds[orig_var].dims[0] # Assuming first dim is time + + # Check if this dimension has a mapping + if orig_time_dim not in time_info["time_coord_mapping"]: + self.logger.warning( + "No time mapping found for %s dimension %s", orig_var, orig_time_dim + ) + continue + + dim_name = time_info["time_coord_mapping"][orig_time_dim] + time_coord_data = self._get_time_coordinate_data(time_info, ds, orig_time_dim) + + self.logger.info("Adding variable %-65s %s", f"{orig_var} as", new_var) + + # Create the data array + self.combined_nc[new_var] = self._create_data_array_for_variable( + ds, orig_var, dim_name, time_coord_data + ) + + # Add time metadata + self._add_time_metadata_to_variable(new_var, dim_name) + + def _add_consolidation_comment(self, time_info: dict) -> None: + """Add a comment documenting time coordinate consolidation.""" + if time_info["consolidated_time_name"] in self.combined_nc.variables: + mapping_info = ", ".join( + [f"{orig} -> {new}" for orig, new in time_info["time_coord_mapping"].items()] + ) + self.combined_nc[time_info["consolidated_time_name"]].attrs["comment"] = ( + f"Consolidated time coordinate from: {mapping_info}" + ) + + def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10) -> None: + """Add nudged longitude and latitude variables to the combined dataset.""" try: - nudged_longitude, nudged_latitude = self._nudge_pos() + nudged_longitude, nudged_latitude, segment_count, segment_minsum = nudge_positions( + nav_longitude=self.combined_nc["universals_longitude"], + nav_latitude=self.combined_nc["universals_latitude"], + gps_longitude=self.combined_nc["nal9602_longitude_fix"], + gps_latitude=self.combined_nc["nal9602_latitude_fix"], + logger=self.logger, + auv_name="", + mission="", + max_sec_diff_at_end=max_sec_diff_at_end, + create_plots=True, + ) except ValueError as e: self.logger.error("Nudging positions failed: %s", e) # noqa: TRY400 return + + self.logger.info( + "nudge_positions created %d segments with segment_minsum = %f", + segment_count, + segment_minsum, + ) self.combined_nc["nudged_longitude"] = nudged_longitude self.combined_nc["nudged_longitude"].attrs = { "long_name": "Nudged Longitude", @@ -573,8 +598,42 @@ def combine_groups(self): "units": "degrees_north", "comment": "Dead reckoned latitude nudged to GPS positions", } - # Remove the intermediate file - Path(intermediate_file).unlink() + + def combine_groups(self): + """Combine group files into a single NetCDF dataset with consolidated time coordinates.""" + log_file = self.args.log_file + src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) + group_files = sorted(src_dir.glob(f"{Path(log_file).stem}_{GROUP}_*.nc")) + self.summary_fields = set() + self.combined_nc = xr.Dataset() + + for group_file in group_files: + self.logger.info("Group file: %s", group_file.name) + # Open group file without decoding to have np.allclose work properly + with xr.open_dataset(group_file, decode_cf=False) as ds: + # Group name to prepend variable names is lowercase with underscores removed + group_name = group_file.stem.split(f"{GROUP}_")[1].replace("_", "").lower() + time_info = self._consolidate_group_time_coords(ds, group_name) + + # Add time coordinate(s) to combined dataset + self._add_time_coordinates_to_combined(time_info, ds) + + # Process all data variables in the group + self._process_group_variables(ds, group_name, time_info) + + # Add consolidation comment if applicable + self._add_consolidation_comment(time_info) + + # Write intermediate file for cf_xarray decoding + intermediate_file = self._intermediate_write_netcdf() + with xr.open_dataset(intermediate_file, decode_cf=True) as ds: + self.combined_nc = ds.load() + + # Add nudged coordinates + self._add_nudged_coordinates() + + # Clean up intermediate file + ##Path(intermediate_file).unlink() def _intermediate_write_netcdf(self) -> None: """Write out an intermediate combined netCDF file so that data can be From 38be49be3b04f1e49ed0e286aff6ca9e78143e2e Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 12 Nov 2025 13:33:45 -0800 Subject: [PATCH 20/28] Shift dead reckoned nav data by 1 for shared nudge_positions() to work. Use --plot option. --- src/data/combine.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index 05a48307..0f04e03d 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -565,15 +565,16 @@ def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10) -> None: """Add nudged longitude and latitude variables to the combined dataset.""" try: nudged_longitude, nudged_latitude, segment_count, segment_minsum = nudge_positions( - nav_longitude=self.combined_nc["universals_longitude"], - nav_latitude=self.combined_nc["universals_latitude"], + # For LRAUV data the nav positions are shifted by 1 to align with GPS fixes + nav_longitude=self.combined_nc["universals_longitude"].shift(universals_time=1), + nav_latitude=self.combined_nc["universals_latitude"].shift(universals_time=1), gps_longitude=self.combined_nc["nal9602_longitude_fix"], gps_latitude=self.combined_nc["nal9602_latitude_fix"], logger=self.logger, auv_name="", mission="", max_sec_diff_at_end=max_sec_diff_at_end, - create_plots=True, + create_plots=self.args.plot, ) except ValueError as e: self.logger.error("Nudging positions failed: %s", e) # noqa: TRY400 @@ -691,12 +692,6 @@ def process_command_line(self): description=__doc__, epilog=examples, ) - - parser.add_argument( - "--noinput", - action="store_true", - help="Execute without asking for a response, e.g. to not ask to re-download file", - ) parser.add_argument( "--log_file", action="store", @@ -708,10 +703,8 @@ def process_command_line(self): ) parser.add_argument( "--plot", - action="store", - help="Create intermediate plots" - " to validate data operations. Use first to plot " - " points, e.g. first2000. Program blocks upon show.", + action="store_true", + help="Create intermediate plot(s) to help validate processing", ) parser.add_argument( "-v", From 3c77dfd26595571f007c5f8bfc5590822a2df4d0 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 12 Nov 2025 16:47:41 -0800 Subject: [PATCH 21/28] WIP: Making work for data from *_combined.nc --- src/data/align.py | 289 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 273 insertions(+), 16 deletions(-) diff --git a/src/data/align.py b/src/data/align.py index f8603424..bab78d52 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -14,6 +14,7 @@ import argparse import logging +import os import re import sys import time @@ -42,6 +43,10 @@ class InvalidCalFile(Exception): pass +class InvalidCombinedFile(Exception): + pass + + class Align_NetCDF: logger = logging.getLogger(__name__) _handler = logging.StreamHandler() @@ -53,6 +58,8 @@ def global_metadata(self): """Use instance variables to return a dictionary of metadata specific for the data that are written """ + # Try to get actual host name, fall back to container name + actual_hostname = os.getenv("HOST_NAME", gethostname()) repo = git.Repo(search_parent_directories=True) try: gitcommit = repo.head.object.hexsha @@ -94,17 +101,30 @@ def global_metadata(self): metadata["useconst"] = "Not intended for legal use. Data may contain inaccuracies." metadata["history"] = f"Created by {self.commandline} on {iso_now}" - metadata["title"] = ( - f"Calibrated and aligned AUV sensor data from" - f" {self.args.auv_name} mission {self.args.mission}" - ) - from_data = "calibrated data" - metadata["source"] = ( - f"MBARI Dorado-class AUV data produced from {from_data}" - f" with execution of '{self.commandline}' at {iso_now} on" - f" host {gethostname()} using git commit {gitcommit} from" - f" software at 'https://github.com/mbari-org/auv-python'" - ) + if self.args.auv_name and self.args.mission: + metadata["title"] = ( + f"Calibrated and aligned AUV sensor data from" + f" {self.args.auv_name} mission {self.args.mission}" + ) + from_data = "calibrated data" + metadata["source"] = ( + f"MBARI Dorado-class AUV data produced from {from_data}" + f" with execution of '{self.commandline}' at {iso_now} on" + f" host {actual_hostname} using git commit {gitcommit} from" + f" software at 'https://github.com/mbari-org/auv-python'" + ) + else: + metadata["title"] = ( + f"Combined and aligned LRAUV instrument data from" + f" log file {Path(self.args.log_file).name}" + ) + from_data = "combined data" + metadata["source"] = ( + f"MBARI Long Range AUV data produced from {from_data}" + f" with execution of '{self.commandline}' at {iso_now} on" + f" host {actual_hostname} using git commit {gitcommit} from" + f" software at 'https://github.com/mbari-org/auv-python'" + ) metadata["summary"] = ( "Observational oceanographic data obtained from an Autonomous" " Underwater Vehicle mission with measurements at" @@ -115,7 +135,7 @@ def global_metadata(self): # Append location of original data files to summary matches = re.search( "(" + SUMMARY_SOURCE.replace("{}", r".+$") + ")", - self.calibrated_nc.attrs["summary"], + self.combined_nc.attrs["summary"], ) if matches: metadata["summary"] += " " + matches.group(1) @@ -334,6 +354,229 @@ def process_cal(self, vehicle: str = "", name: str = "", log_file: str = "") -> return netcdfs_dir + def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR0915 + """Process combined LRAUV data from *_combined.nc files created by combine.py""" + netcdfs_dir = Path(BASE_LRAUV_PATH, f"{Path(log_file).parent}") + src_file = Path(netcdfs_dir, f"{Path(log_file).stem}_combined.nc") + + self.combined_nc = xr.open_dataset(src_file) + self.logger.info("Processing %s", src_file) + self.aligned_nc = xr.Dataset() + self.min_time = datetime.now(UTC) + self.max_time = datetime(1970, 1, 1, tzinfo=UTC) + self.min_depth = np.inf + self.max_depth = -np.inf + self.min_lat = np.inf + self.max_lat = -np.inf + self.min_lon = np.inf + self.max_lon = -np.inf + + # Find navigation coordinates from combined data - must be from universals group + nav_coords = {} + for coord_type in ["longitude", "latitude", "depth", "time"]: + coord_var = f"universals_{coord_type}" + if coord_var not in self.combined_nc: + error_message = ( + f"Required universals coordinate {coord_var} not found in {src_file}" + ) + raise InvalidCombinedFile(error_message) + nav_coords[coord_type] = coord_var + self.logger.info("Found navigation coordinate: %s", coord_var) + + # Create interpolators for navigation coordinates + try: + lat_interp = interp1d( + self.combined_nc[nav_coords["latitude"]] + .get_index("universals_time") + .view(np.int64) + .tolist(), + self.combined_nc[nav_coords["latitude"]].values, + fill_value=( + self.combined_nc[nav_coords["latitude"]][0], + self.combined_nc[nav_coords["latitude"]][-1], + ), + bounds_error=False, + ) + + lon_interp = interp1d( + self.combined_nc[nav_coords["longitude"]] + .get_index("universals_time") + .view(np.int64) + .tolist(), + self.combined_nc[nav_coords["longitude"]].values, + fill_value=( + self.combined_nc[nav_coords["longitude"]][0], + self.combined_nc[nav_coords["longitude"]][-1], + ), + bounds_error=False, + ) + + depth_interp = interp1d( + self.combined_nc[nav_coords["depth"]] + .get_index("universals_time") + .view(np.int64) + .tolist(), + self.combined_nc[nav_coords["depth"]].values, + fill_value=( + self.combined_nc[nav_coords["depth"]][0], + self.combined_nc[nav_coords["depth"]][-1], + ), + bounds_error=False, + ) + + except KeyError as e: + error_message = f"Missing navigation data in {src_file}: {e}" + raise InvalidCombinedFile(error_message) from e + except ValueError as e: + error_message = f"Cannot interpolate navigation coordinates: {e}" + raise InvalidCombinedFile(error_message) from e + + # Process group-based variables (skip coordinate variables) + for variable in self.combined_nc: + # Skip time coordinate variables + if variable.endswith("_time"): + continue + + # Skip the navigation coordinate variables themselves + if variable in nav_coords.values(): + continue + + # Extract group name from variable (e.g., "ctd_seabird_salinity" -> "ctd_seabird") + var_parts = variable.split("_") + if len(var_parts) < 2: # noqa: PLR2004 + self.logger.debug("Skipping variable with unexpected name format: %s", variable) + continue + + # Try to find the corresponding time coordinate + # Look for pattern: group_name + "_time" + possible_time_coords = [] + for i in range(len(var_parts)): + group_candidate = "_".join(var_parts[: i + 1]) + time_coord_candidate = f"{group_candidate}_time" + if time_coord_candidate in self.combined_nc: + possible_time_coords.append((group_candidate, time_coord_candidate)) + + if not possible_time_coords: + self.logger.warning("No time coordinate found for variable: %s", variable) + continue + + # Use the longest matching group name (most specific) + group_name, timevar = max(possible_time_coords, key=lambda x: len(x[0])) + self.logger.debug( + "Processing %s with group %s and time %s", variable, group_name, timevar + ) + + # Copy the original variable + self.aligned_nc[variable] = self.combined_nc[variable] + + # Get the time index for this variable + var_time = self.aligned_nc[variable].get_index(timevar).view(np.int64).tolist() + + # Calculate sampling rate + sample_rate = np.round( + 1.0 / (np.mean(np.diff(self.combined_nc[timevar])) / np.timedelta64(1, "s")), + decimals=2, + ) + + # Create aligned variable with proper attributes + self.aligned_nc[variable] = xr.DataArray( + self.combined_nc[variable].values, + dims={timevar}, + coords=[self.combined_nc[variable].get_index(timevar)], + name=variable, + ) + self.aligned_nc[variable].attrs = self.combined_nc[variable].attrs + self.aligned_nc[variable].attrs["coordinates"] = ( + f"{group_name}_time {group_name}_depth {group_name}_latitude {group_name}_longitude" + ) + self.logger.info("%s: instrument_sample_rate_hz = %.2f", variable, sample_rate) + self.aligned_nc[variable].attrs["instrument_sample_rate_hz"] = sample_rate + + # Create interpolated coordinate variables for this group + coord_names = ["depth", "latitude", "longitude"] + coord_interps = [depth_interp, lat_interp, lon_interp] + coord_sources = [nav_coords["depth"], nav_coords["latitude"], nav_coords["longitude"]] + + for coord_name, coord_interp, coord_source in zip( + coord_names, coord_interps, coord_sources, strict=True + ): + coord_var_name = f"{group_name}_{coord_name}" + + self.aligned_nc[coord_var_name] = xr.DataArray( + coord_interp(var_time).astype(np.float64).tolist(), + dims={timevar}, + coords=[self.combined_nc[variable].get_index(timevar)], + name=coord_var_name, + ) + + # Copy attributes from source coordinate + if coord_source in self.combined_nc: + self.aligned_nc[coord_var_name].attrs = self.combined_nc[coord_source].attrs + + # Update attributes + self.aligned_nc[coord_var_name].attrs["long_name"] = coord_name.title() + self.aligned_nc[coord_var_name].attrs["instrument_sample_rate_hz"] = sample_rate + + if coord_name in ["latitude", "longitude"]: + self.aligned_nc[coord_var_name].attrs["comment"] = ( + self.aligned_nc[coord_var_name].attrs.get("comment", "") + + f". Variable {coord_source} from {src_file} file linearly" + f" interpolated onto {group_name} time values." + ) + + # Update spatial temporal bounds for global metadata + if pd.to_datetime(self.aligned_nc[timevar][0].values).tz_localize(UTC) < pd.to_datetime( + self.min_time + ): + self.min_time = pd.to_datetime(self.aligned_nc[timevar][0].values).tz_localize(UTC) + if pd.to_datetime(self.aligned_nc[timevar][-1].values).tz_localize( + UTC + ) > pd.to_datetime(self.max_time): + self.max_time = pd.to_datetime(self.aligned_nc[timevar][-1].values).tz_localize(UTC) + + # Update bounds using the interpolated coordinates + depth_coord = f"{group_name}_depth" + lat_coord = f"{group_name}_latitude" + lon_coord = f"{group_name}_longitude" + + if self.aligned_nc[depth_coord].min() < self.min_depth: + self.min_depth = self.aligned_nc[depth_coord].min().to_numpy() + if self.aligned_nc[depth_coord].max() > self.max_depth: + self.max_depth = self.aligned_nc[depth_coord].max().to_numpy() + if self.aligned_nc[lat_coord].min() < self.min_lat: + self.min_lat = self.aligned_nc[lat_coord].min().to_numpy() + if self.aligned_nc[lat_coord].max() > self.max_lat: + self.max_lat = self.aligned_nc[lat_coord].max().to_numpy() + if self.aligned_nc[lon_coord].min() < self.min_lon: + self.min_lon = self.aligned_nc[lon_coord].min().to_numpy() + if self.aligned_nc[lon_coord].max() > self.max_lon: + self.max_lon = self.aligned_nc[lon_coord].max().to_numpy() + + return netcdfs_dir + + def write_combined_netcdf( + self, netcdfs_dir, vehicle: str = "", name: str = "", log_file: str = "" + ) -> None: + """Write aligned combined data to NetCDF file""" + if log_file: + # For LRAUV log files, use the log file stem for output name + out_fn = Path(netcdfs_dir, f"{Path(log_file).stem}_align.nc") + else: + name = name or self.args.mission + vehicle = vehicle or self.args.auv_name + out_fn = Path(netcdfs_dir, f"{vehicle}_{name}_align.nc") + + self.aligned_nc.attrs = self.global_metadata() + self.logger.info("Writing aligned combined data to %s", out_fn) + if out_fn.exists(): + self.logger.debug("Removing existing file %s", out_fn) + out_fn.unlink() + self.aligned_nc.to_netcdf(out_fn) + self.logger.info( + "Data variables written: %s", + ", ".join(sorted(self.aligned_nc.variables)), + ) + def write_netcdf(self, netcdfs_dir, vehicle: str = "", name: str = "") -> None: name = name or self.args.mission vehicle = vehicle or self.args.auv_name @@ -354,6 +597,13 @@ def process_command_line(self): examples += " Align calibrated data for some missions:\n" examples += " " + sys.argv[0] + " --mission 2020.064.10\n" examples += " " + sys.argv[0] + " --auv_name i2map --mission 2020.055.01\n" + examples += " Align combined LRAUV data:\n" + examples += ( + " " + + sys.argv[0] + + " --log_file brizo/missionlogs/2025/20250909_20250915/20250914T080941/" + + "202509140809_202509150109.nc4\n" + ) parser = argparse.ArgumentParser( formatter_class=RawTextHelpFormatter, @@ -415,10 +665,17 @@ def process_command_line(self): align_netcdf = Align_NetCDF() align_netcdf.process_command_line() p_start = time.time() - if align_netcdf.args.auv_name and align_netcdf.args.mission: + + if align_netcdf.args.log_file: + # Process combined LRAUV data using log_file + netcdf_dir = align_netcdf.process_combined(log_file=align_netcdf.args.log_file) + align_netcdf.write_combined_netcdf(netcdf_dir, log_file=align_netcdf.args.log_file) + elif align_netcdf.args.auv_name and align_netcdf.args.mission: + # Process calibrated data using auv_name and mission netcdf_dir = align_netcdf.process_cal() align_netcdf.write_netcdf(netcdf_dir) - elif align_netcdf.args.log_file: - netcdf_dir = align_netcdf.process_cal(log_file=align_netcdf.args.log_file) - align_netcdf.write_netcdf(netcdf_dir) + else: + align_netcdf.logger.error("Must provide either --log_file or both --auv_name and --mission") + sys.exit(1) + align_netcdf.logger.info("Time to process: %.2f seconds", (time.time() - p_start)) From a8eb98f1b0f2992b56340d7e23fb7188e3e6d336 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 13 Nov 2025 11:44:21 -0800 Subject: [PATCH 22/28] Update numbers for tests to pass locally. --- src/data/test_process_dorado.py | 4 ++-- src/data/test_process_i2map.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index 90ec047b..a729848e 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -33,7 +33,7 @@ def test_process_dorado(complete_dorado_processing): # update the expected size here. EXPECTED_SIZE_GITHUB = 621286 EXPECTED_SIZE_ACT = 621298 - EXPECTED_SIZE_LOCAL = 621286 + EXPECTED_SIZE_LOCAL = 621452 if str(proc.args.base_path).startswith("/home/runner"): # The size is different in GitHub Actions, maybe due to different metadata assert nc_file.stat().st_size == EXPECTED_SIZE_GITHUB # noqa: S101 @@ -52,7 +52,7 @@ def test_process_dorado(complete_dorado_processing): # Check that the MD5 hash has not changed EXPECTED_MD5_GITHUB = "9f3f9e2e5abed08692ddb233dec0d0ac" EXPECTED_MD5_ACT = "bdb9473e5dedb694618f518b8cf0ca1e" - EXPECTED_MD5_LOCAL = "6ecb2229b00835055619e982fe9d5023" + EXPECTED_MD5_LOCAL = "9137be5a2ed840cfca94a723285355ec" if str(proc.args.base_path).startswith("/home/runner"): # The MD5 hash is different in GitHub Actions, maybe due to different metadata assert hashlib.md5(open(nc_file, "rb").read()).hexdigest() == EXPECTED_MD5_GITHUB # noqa: PTH123, S101, S324, SIM115 diff --git a/src/data/test_process_i2map.py b/src/data/test_process_i2map.py index e2f6cb05..82fec722 100644 --- a/src/data/test_process_i2map.py +++ b/src/data/test_process_i2map.py @@ -32,7 +32,7 @@ def test_process_i2map(complete_i2map_processing): # update the expected size here. EXPECTED_SIZE_GITHUB = 58832 EXPECTED_SIZE_ACT = 58816 - EXPECTED_SIZE_LOCAL = 58884 + EXPECTED_SIZE_LOCAL = 59042 if str(proc.args.base_path).startswith("/home/runner"): # The size is different in GitHub Actions, maybe due to different metadata assert nc_file.stat().st_size == EXPECTED_SIZE_GITHUB # noqa: S101 From 71445cecf0118e377491dfdd89be4e600e903795 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 13 Nov 2025 11:45:04 -0800 Subject: [PATCH 23/28] Look for upstream summary metadata in the correct data member. --- src/data/align.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/data/align.py b/src/data/align.py index bab78d52..7e69109a 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -113,7 +113,7 @@ def global_metadata(self): f" host {actual_hostname} using git commit {gitcommit} from" f" software at 'https://github.com/mbari-org/auv-python'" ) - else: + elif self.args.log_file: metadata["title"] = ( f"Combined and aligned LRAUV instrument data from" f" log file {Path(self.args.log_file).name}" @@ -133,10 +133,16 @@ def global_metadata(self): " software." ) # Append location of original data files to summary - matches = re.search( - "(" + SUMMARY_SOURCE.replace("{}", r".+$") + ")", - self.combined_nc.attrs["summary"], - ) + if self.args.auv_name and self.args.mission: + matches = re.search( + "(" + SUMMARY_SOURCE.replace("{}", r".+$") + ")", + self.calibrated_nc.attrs["summary"], + ) + elif self.args.log_file: + matches = re.search( + "(" + SUMMARY_SOURCE.replace("{}", r".+$") + ")", + self.combined_nc.attrs["summary"], + ) if matches: metadata["summary"] += " " + matches.group(1) metadata["comment"] = ( From 794eed82ac96794329dff3726089182d60b5a00c Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 13 Nov 2025 11:45:43 -0800 Subject: [PATCH 24/28] Set create_plots to False - change back to True for debugging. --- src/data/calibrate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/calibrate.py b/src/data/calibrate.py index 2cdc8941..c9e735f0 100755 --- a/src/data/calibrate.py +++ b/src/data/calibrate.py @@ -1676,7 +1676,7 @@ def _nudge_pos(self, max_sec_diff_at_end=10): auv_name=self.args.auv_name, mission=self.args.mission, max_sec_diff_at_end=max_sec_diff_at_end, - create_plots=True, + create_plots=False, ) # Store results in instance variables for compatibility From d389dc147b82e79f89f523780d658d51ccfc0ff3 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 13 Nov 2025 11:46:37 -0800 Subject: [PATCH 25/28] Add setting of HOST_NAME environment variable for more meaningful metadata. --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 81417927..4861c407 100644 --- a/README.md +++ b/README.md @@ -103,11 +103,11 @@ First time use with Docker on a server using a service account: * git clone git@github.com:mbari-org/auv-python.git * cd auv-python * Create a .env file in `/opt/auv-python` with the following contents: - `M3_VOL=` - `AUVCTD_VOL=` - `CALIBRATION_VOL=` - `WORK_VOL=/data` - + `M3_VOL=` + `AUVCTD_VOL=` + `CALIBRATION_VOL=` + `WORK_VOL=/data` + `HOST_NAME=` After installation and when logging into the server again mission data can be processed thusly: * Setting up environment and printing help message: `sudo -u docker_user -i` From 8643e7df4f0f7701e8445a878e5c6c9c0f9e95c3 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 13 Nov 2025 11:50:52 -0800 Subject: [PATCH 26/28] Update numbers for tests to pass in Actions. --- src/data/test_process_dorado.py | 2 +- src/data/test_process_i2map.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index a729848e..56c73b58 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -31,7 +31,7 @@ def test_process_dorado(complete_dorado_processing): # but it will alert us if a code change unexpectedly changes the file size. # If code changes are expected to change the file size then we should # update the expected size here. - EXPECTED_SIZE_GITHUB = 621286 + EXPECTED_SIZE_GITHUB = 59042 EXPECTED_SIZE_ACT = 621298 EXPECTED_SIZE_LOCAL = 621452 if str(proc.args.base_path).startswith("/home/runner"): diff --git a/src/data/test_process_i2map.py b/src/data/test_process_i2map.py index 82fec722..66508695 100644 --- a/src/data/test_process_i2map.py +++ b/src/data/test_process_i2map.py @@ -30,7 +30,7 @@ def test_process_i2map(complete_i2map_processing): # but it will alert us if a code change unexpectedly changes the file size. # If code changes are expected to change the file size then we should # update the expected size here. - EXPECTED_SIZE_GITHUB = 58832 + EXPECTED_SIZE_GITHUB = 58942 EXPECTED_SIZE_ACT = 58816 EXPECTED_SIZE_LOCAL = 59042 if str(proc.args.base_path).startswith("/home/runner"): From 2da3e11aa1aa5360c7b7b7942cf1bda8464f6373 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 13 Nov 2025 12:00:07 -0800 Subject: [PATCH 27/28] Try again with EXPECTED_SIZE_GITHUB. --- src/data/test_process_dorado.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index 56c73b58..ffd8cb58 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -31,7 +31,7 @@ def test_process_dorado(complete_dorado_processing): # but it will alert us if a code change unexpectedly changes the file size. # If code changes are expected to change the file size then we should # update the expected size here. - EXPECTED_SIZE_GITHUB = 59042 + EXPECTED_SIZE_GITHUB = 621404 EXPECTED_SIZE_ACT = 621298 EXPECTED_SIZE_LOCAL = 621452 if str(proc.args.base_path).startswith("/home/runner"): From fbe620a2decff5c163a4623488061e6c572e20bb Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 13 Nov 2025 12:02:52 -0800 Subject: [PATCH 28/28] Update EXPECTED_MD5_GITHUB. --- src/data/test_process_dorado.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index ffd8cb58..d368b183 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -50,7 +50,7 @@ def test_process_dorado(complete_dorado_processing): check_md5 = True if check_md5: # Check that the MD5 hash has not changed - EXPECTED_MD5_GITHUB = "9f3f9e2e5abed08692ddb233dec0d0ac" + EXPECTED_MD5_GITHUB = "3bab0300e575c1d752a35f49e49e340e" EXPECTED_MD5_ACT = "bdb9473e5dedb694618f518b8cf0ca1e" EXPECTED_MD5_LOCAL = "9137be5a2ed840cfca94a723285355ec" if str(proc.args.base_path).startswith("/home/runner"):