From ebaca0fe45918f9e4dae79c66fe4a735e5f3069f Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 8 Oct 2025 17:00:00 -0700 Subject: [PATCH 001/121] Add placeholder LRAUV_WORKFLOW.md file and rename original to DORADO_WORKFLOW.md. --- WORKFLOW.md => DORADO_WORKFLOW.md | 4 +- LRAUV_WORKFLOW.md | 65 +++++++++++++++++++++++++++++++ TROUBLESHOOTING.md | 2 +- notebooks/README.md | 2 +- src/data/dorado_info.py | 2 +- 5 files changed, 70 insertions(+), 5 deletions(-) rename WORKFLOW.md => DORADO_WORKFLOW.md (98%) create mode 100644 LRAUV_WORKFLOW.md diff --git a/WORKFLOW.md b/DORADO_WORKFLOW.md similarity index 98% rename from WORKFLOW.md rename to DORADO_WORKFLOW.md index d946c635..9640f503 100644 --- a/WORKFLOW.md +++ b/DORADO_WORKFLOW.md @@ -1,6 +1,6 @@ -## Data Workflow +## Dorado Data Workflow -The sequence of steps to process data is as follows: +The sequence of steps to process Dorado data is as follows: logs2netcdfs.py → calibrate.py → align.py → resample.py → archive.py → plot.py diff --git a/LRAUV_WORKFLOW.md b/LRAUV_WORKFLOW.md new file mode 100644 index 00000000..cd387442 --- /dev/null +++ b/LRAUV_WORKFLOW.md @@ -0,0 +1,65 @@ +## LRAUV Data Workflow + +The sequence of steps to process LRAUV data is as follows: + +TODO: Update this to reflect actual LRAUV Data Workflow. It should + mirror the Dorado Workflow, especially the last few steps. + + ??? → calibrate.py → align.py → resample.py → archive.py → plot.py + +Details of each step are described in the respective scripts and in the +description of output netCDF files below. The output file directory structure +on the local file system's work directory is as follows: + + ├── data + │ ├── auv_data + │ │ ├── <- e.g.: ahi, brizo, pontus, tethys, ... + │ │ │ ├── missionnetcdfs <- netCDF files + │ │ │ │ ├── <- e.g.: 2025/20250107_20250123/20250107T213313/202501072133_202501080049.nc4 + │ │ │ │ │ ├── <- .nc files for each instrument created + | | | | | | by ... + │ │ │ │ │ ├── <- .nc file with calibrated data created + | | | | | | by calibrate.py + │ │ │ │ │ ├── <- .nc file with all measurement variables + | | | | | | having associated coordinate variables + | | | | | | at original instrument sampling rate - + | | | | | | created by align.py + │ │ │ │ │ ├── <- .nc file with all measurement variables + resampled to a common time grid at n + Second intervals - created by resample.py + + ??? + + calibrate.py + Apply calibration coefficients to the original data. The calibrated data + are written to a new netCDF file in the missionnetcdfs/ + directory ending with _cal.nc. This step also includes nudging the + underwater portions of the navigation positions to the GPS fixes + done at the surface and applying pitch corrections to the sensor + depth for those sensors (instruments) for which offset values are + specified in SensorInfo. Some minimal QC is done in this step, namely + removal on non-monotonic times. The record variables in the netCDF + file have only their original coordinates, namely time associated with + them. + + align.py + Interpolate corrected lat/lon variables to the original sampling + intervals for each instrument's record variables. This format is + analogous to the .nc4 files produced by the LRAUV unserialize + process. These are the best files to use for the highest temporal + resolution of the data. Unlike the .nc4 files align.py's output files + use a naming convention rather than netCDF4 groups for each instrument. + + resample.py + Produce a netCDF file with all of the instrument's record variables + resampled to the same temporal interval. The coordinate variables are + also resampled to the same temporal interval and named with standard + depth, latitude, and longitude names. These are the best files to + use for loading data into STOQS and for analyses requiring all the + data to be on the same spatial temporal grid. + + archive.py + Copy the netCDF files to the archive directory. The archive directory + is initally in the AUVCTD share on atlas which is shared with the + data from the Dorado Gulper vehicle, but can also be on the M3 share + on thalassa near the original log data. diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md index eee85c00..7d159a04 100644 --- a/TROUBLESHOOTING.md +++ b/TROUBLESHOOTING.md @@ -14,7 +14,7 @@ and make sure that it's the only entry in "process_dorado" that is uncommented. 2. From VS Code's Run and Debug panel select "process_dorado" and click the green Start Debugging play button. For data to be copied from the archive the smb://atlas.shore.mbari.org/AUVCTD share must be mounted on your computer. Primary development is done in MacOS where the local mount point is /Volumes. Archive volumes are hard-coded as literals in [src/data/process_dorado.py](https://github.com/mbari-org/auv-python/blob/fc3b58613761b295ab47907993c4d0eb0bceb197/src/data/process_dorado.py) and [src/data/process_i2map.py](https://github.com/mbari-org/auv-python/blob/fc3b58613761b295ab47907993c4d0eb0bceb197/src/data/process_i2map.py). These should be changed if you mount these volumes at a different location. -3. Mission log data will copied to your `auv-python/data/auv_data/` directory into subdirectories organized by vehicle name, mission, and processing step. Data will be processed as described in [WORKFLOW.md](WORKFLOW.md). A typical mission takes about 10 minutes to process. +3. Mission log data will copied to your `auv-python/data/auv_data/` directory into subdirectories organized by vehicle name, mission, and processing step. Data will be processed as described in [DORADO_WORKFLOW.md](DORADO_WORKFLOW.md). A typical mission takes about 10 minutes to process. 4. After all of the intermediate files are created any step of the workflow may be executed and debugged in VS Code. The `.vscode\launch.json` file has several example entries that can be modified for specific debugging purposes via the menu in the Run and Debug panel. diff --git a/notebooks/README.md b/notebooks/README.md index bb952fed..01719f2b 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -1,5 +1,5 @@ The Notebooks in this directory are intended to be used to examine the data -generated by each of the steps described in the [workflow]("../WORKFLOW.md"): +generated by each of the steps described in the [workflow]("../DORADO_WORKFLOW.md"): logs2netcdfs.py → calibrate.py → align.py → resample.py → archive.py → 1.x 2.x 3.x 4.x 5.x 6.x diff --git a/src/data/dorado_info.py b/src/data/dorado_info.py index cf6cc795..123a652c 100644 --- a/src/data/dorado_info.py +++ b/src/data/dorado_info.py @@ -2293,7 +2293,7 @@ "Overnight diamond pattern for CANON September 2017" " Bad blocks in hs2 data" " QC note: Best CTD is ctd2, ctd2 not great but better for salt although a couple screwey profiles in temp" - " - ctdToUse = ctd1 " + " - ctdToUse = ctd2 " ), } dorado_info["2017.347.00"] = { From b2ced0dbe92c3debee580b87d6a58f1baaa056cf Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 8 Oct 2025 17:08:12 -0700 Subject: [PATCH 002/121] Update link to DORADO_WORKFLOW.md. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d9a185e1..81417927 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ print out the usage information for each of the processing scripts: uv run src/data/process_i2map.py --help uv run src/data/process_dorado.py --help -See [WORKFLOW.md](WORKFLOW.md) for more details on the data processing workflow. +See [DORADO_WORKFLOW.md](DORADO_WORKFLOW.md) for more details on the data processing workflow. ### Jupyter Notebooks ### To run the Jupyter Notebooks, start Jupyter Lab at the command line with: From 942569283be4c5fe843019d7e2817c671142a752 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 10 Oct 2025 17:24:23 -0700 Subject: [PATCH 003/121] Add runner for nc42netcdfs.py using a known_hash. --- .vscode/launch.json | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.vscode/launch.json b/.vscode/launch.json index 71db7357..b45961dc 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -281,6 +281,17 @@ "program": "${workspaceFolder}/src/data/process_Dorado389.py", "console": "integratedTerminal", "args": ["-v", "1", "--noinput", "--no_cleanup", "--download", "--mission", "2011.256.02"] + }, + { + "name": "nc42netcdfs", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/src/data/nc42netcdfs.py", + "console": "integratedTerminal", + "args": ["-v", "1", "--log_file", + "ahi/missionlogs/2025/20250908_20250912/20250911T201546/202509112015_202509112115.nc4", + "--known_hash", + "d1235ead55023bea05e9841465d54a45dfab007a283320322e28b84438fb8a85"] }, ] } From dc864cb0ba766e1086960bfec3c13730011e83c5 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 10 Oct 2025 17:24:49 -0700 Subject: [PATCH 004/121] Update for supporting LRAUV data processing. --- LRAUV_WORKFLOW.md | 6 +++--- notebooks/README.md | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/LRAUV_WORKFLOW.md b/LRAUV_WORKFLOW.md index cd387442..7295dbe4 100644 --- a/LRAUV_WORKFLOW.md +++ b/LRAUV_WORKFLOW.md @@ -2,10 +2,10 @@ The sequence of steps to process LRAUV data is as follows: -TODO: Update this to reflect actual LRAUV Data Workflow. It should - mirror the Dorado Workflow, especially the last few steps. +TODO: Update this to reflect actual LRAUV Data Workflow. It should + mimimic the Dorado Workflow, especially the last few steps. - ??? → calibrate.py → align.py → resample.py → archive.py → plot.py + extract.py → calibrate.py → align.py → resample.py → archive.py → plot.py Details of each step are described in the respective scripts and in the description of output netCDF files below. The output file directory structure diff --git a/notebooks/README.md b/notebooks/README.md index 01719f2b..ff95f775 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -1,5 +1,6 @@ The Notebooks in this directory are intended to be used to examine the data -generated by each of the steps described in the [workflow]("../DORADO_WORKFLOW.md"): +generated by each of the steps described in the [Dorado]]("../DORADO_WORKFLOW.md") +or [LRAUV]("../LRAUV_WORKFLOW.md") WORKFLOW documents: logs2netcdfs.py → calibrate.py → align.py → resample.py → archive.py → 1.x 2.x 3.x 4.x 5.x 6.x From 262d54364d3e57614c9f7be9a54bb235656f8968 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 10 Oct 2025 17:25:42 -0700 Subject: [PATCH 005/121] WIP: Initial commit --- src/data/nc42netcdfs.py | 297 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 297 insertions(+) create mode 100755 src/data/nc42netcdfs.py diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py new file mode 100755 index 00000000..8b22b319 --- /dev/null +++ b/src/data/nc42netcdfs.py @@ -0,0 +1,297 @@ +#!/usr/bin/env python +""" +Extract instrument/group data from LRAUV .nc4 files into individual NetCDF files. + +Makes the original data more accessible for analysis and visualization. +""" + +__author__ = "Mike McCann" +__copyright__ = "Copyright 2025, Monterey Bay Aquarium Research Institute" + +import argparse +import logging +import os +import sys +from pathlib import Path + +import netCDF4 +import pooch +import xarray as xr + +# Local directory that serves as the work area for log_files and netcdf files +BASE_LRAUV_WEB = "https://dods.mbari.org/data/lrauv/" +BASE_PATH = Path(__file__).parent.joinpath("../../data/lrauv_data").resolve() +SUMMARY_SOURCE = "Original LRAUV data extracted from {}, group {}" +GROUPS = ["navigation", "ctd", "ecopuck"] # Your actual group names + +SCI_PARMS = { + "/": [ + { + "name": "concentration_of_colored_dissolved_organic_matter_in_sea_water", + "rename": "colored_dissolved_organic_matter", + } + ], + "Aanderaa_O2": [{"name": "mass_concentration_of_oxygen_in_sea_water", "rename": "oxygen"}], + "CTD_NeilBrown": [ + {"name": "sea_water_salinity", "rename": "salinity"}, + {"name": "sea_water_temperature", "rename": "temperature"}, + ], + "CTD_Seabird": [ + {"name": "sea_water_salinity", "rename": "salinity"}, + {"name": "sea_water_temperature", "rename": "temperature"}, + { + "name": "mass_concentration_of_oxygen_in_sea_water", + "rename": "mass_concentration_of_oxygen_in_sea_water", + }, + ], + "ISUS": [{"name": "mole_concentration_of_nitrate_in_sea_water", "rename": "nitrate"}], + "PAR_Licor": [{"name": "downwelling_photosynthetic_photon_flux_in_sea_water", "rename": "PAR"}], + "WetLabsBB2FL": [ + {"name": "mass_concentration_of_chlorophyll_in_sea_water", "rename": "chlorophyll"}, + {"name": "OutputChl", "rename": "chl"}, + {"name": "Output470", "rename": "bbp470"}, + {"name": "Output650", "rename": "bbp650"}, + {"name": "VolumeScatCoeff117deg470nm", "rename": "volumescatcoeff117deg470nm"}, + {"name": "VolumeScatCoeff117deg650nm", "rename": "volumescatcoeff117deg650nm"}, + { + "name": "ParticulateBackscatteringCoeff470nm", + "rename": "particulatebackscatteringcoeff470nm", + }, + { + "name": "ParticulateBackscatteringCoeff650nm", + "rename": "particulatebackscatteringcoeff650nm", + }, + ], + "WetLabsSeaOWL_UV_A": [ + { + "name": "concentration_of_chromophoric_dissolved_organic_matter_in_sea_water", + "rename": "chromophoric_dissolved_organic_matter", + }, + {"name": "mass_concentration_of_chlorophyll_in_sea_water", "rename": "chlorophyll"}, + {"name": "BackscatteringCoeff700nm", "rename": "BackscatteringCoeff700nm"}, + {"name": "VolumeScatCoeff117deg700nm", "rename": "VolumeScatCoeff117deg700nm"}, + { + "name": "mass_concentration_of_petroleum_hydrocarbons_in_sea_water", + "rename": "petroleum_hydrocarbons", + }, + ], + "WetLabsUBAT": [ + {"name": "average_bioluminescence", "rename": "average_bioluminescence"}, + {"name": "flow_rate", "rename": "ubat_flow_rate"}, + {"name": "digitized_raw_ad_counts", "rename": "digitized_raw_ad_counts"}, + ], +} + +ENG_PARMS = { + "BPC1": [ + {"name": "platform_battery_charge", "rename": "health_platform_battery_charge"}, + {"name": "platform_battery_voltage", "rename": "health_platform_average_voltage"}, + ], + "BuoyancyServo": [ + {"name": "platform_buoyancy_position", "rename": "control_inputs_buoyancy_position"} + ], + "DeadReckonUsingMultipleVelocitySources": [ + { + "name": "fix_residual_percent_distance_traveled", + "rename": "fix_residual_percent_distance_traveled_DeadReckonUsingMultipleVelocitySources", # noqa: E501 + }, + {"name": "longitude", "rename": "pose_longitude_DeadReckonUsingMultipleVelocitySources"}, + {"name": "latitude", "rename": "pose_latitude_DeadReckonUsingMultipleVelocitySources"}, + {"name": "depth", "rename": "pose_depth_DeadReckonUsingMultipleVelocitySources"}, + ], + "DeadReckonUsingSpeedCalculator": [ + { + "name": "fix_residual_percent_distance_traveled", + "rename": "fix_residual_percent_distance_traveled_DeadReckonUsingSpeedCalculator", + }, + {"name": "longitude", "rename": "pose_longitude_DeadReckonUsingSpeedCalculator"}, + {"name": "latitude", "rename": "pose_latitude_DeadReckonUsingSpeedCalculator"}, + {"name": "depth", "rename": "pose_depth_DeadReckonUsingSpeedCalculator"}, + ], + "ElevatorServo": [ + {"name": "platform_elevator_angle", "rename": "control_inputs_elevator_angle"} + ], + "MassServo": [{"name": "platform_mass_position", "rename": "control_inputs_mass_position"}], + "NAL9602": [ + {"name": "time_fix", "rename": "fix_time"}, + {"name": "latitude_fix", "rename": "fix_latitude"}, + {"name": "longitude_fix", "rename": "fix_longitude"}, + ], + "Onboard": [{"name": "platform_average_current", "rename": "health_platform_average_current"}], + "RudderServo": [{"name": "platform_rudder_angle", "rename": "control_inputs_rudder_angle"}], + "ThrusterServo": [ + { + "name": "platform_propeller_rotation_rate", + "rename": "control_inputs_propeller_rotation_rate", + } + ], + "CurrentEstimator": [ + { + "name": "current_direction_navigation_frame", + "rename": "current_direction_navigation_frame", + }, + {"name": "current_speed_navigation_frame", "rename": "current_speed_navigation_frame"}, + ], +} + +SCIENG_PARMS = {**SCI_PARMS, **ENG_PARMS} + + +class Extract: + """Extract instrument/group data from LRAUV .nc4 files into individual NetCDF files.""" + + logger = logging.getLogger(__name__) + _handler = logging.StreamHandler() + _formatter = logging.Formatter( + "%(levelname)s %(asctime)s %(filename)s " + "%(funcName)s():%(lineno)d [%(process)d] %(message)s", + ) + _handler.setFormatter(_formatter) + logger.addHandler(_handler) + _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) + + def download_with_pooch(self, url, local_dir, known_hash=None): + """Download using pooch with caching and verification.""" + downloader = pooch.HTTPDownloader(timeout=(60, 300), progressbar=True) + return pooch.retrieve( + url=url, + known_hash=known_hash, # Optional but recommended for integrity + fname=Path(url).name, + path=local_dir, + downloader=downloader, + ) + + def get_groups_netcdf4(self, file_path): + """Get list of groups using netCDF4 library.""" + with netCDF4.Dataset(file_path, "r") as dataset: + return list(dataset.groups.keys()) + + def extract_groups_to_files(self, input_file, output_dir): + """Extract each group to a separate NetCDF file.""" + output_dir = Path(output_dir) + output_dir.mkdir(exist_ok=True, parents=True) + + all_groups = self.get_groups_netcdf4(input_file) + + for group_name, group_parms in SCIENG_PARMS.items(): + if group_name not in all_groups: + self.logger.warning("Group %s not found in %s", group_name, input_file) + continue + try: + ds = xr.open_dataset(input_file, group=group_name) + output_file = output_dir / f"{group_name}.nc" + # Output only the variables of interest + parms = [p["name"] for p in group_parms if "name" in p] + ds = ds[parms] + ds.to_netcdf(path=str(output_file), format="NETCDF4") + ds.close() + self.logger.info("Extracted %s to %s", group_name, output_file) + except (FileNotFoundError, OSError, ValueError): + self.logger.warning("Could not extract %s", group_name) + except KeyError: + self.logger.warning("Variable %s not found in group %s", parms, group_name) + + def process_command_line(self): + examples = "Examples:" + "\n\n" + examples += " Write to local missionnetcdfs direcory:\n" + examples += " " + sys.argv[0] + " --mission 2020.064.10\n" + examples += " " + sys.argv[0] + " --auv_name i2map --mission 2020.055.01\n" + + parser = argparse.ArgumentParser( + formatter_class=argparse.RawTextHelpFormatter, + description=__doc__, + epilog=examples, + ) + + parser.add_argument( + "--base_path", + action="store", + default=BASE_PATH, + help="Base directory for missionlogs and missionnetcdfs, default: auv_data", + ) + parser.add_argument( + "--title", + action="store", + help="A short description of the dataset", + ) + parser.add_argument( + "--summary", + action="store", + help="Additional information about the dataset", + ) + + parser.add_argument( + "--noinput", + action="store_true", + help="Execute without asking for a response, e.g. to not ask to re-download file", + ) + parser.add_argument( + "--clobber", + action="store_true", + help="Use with --noinput to overwrite existing downloaded log files", + ) + parser.add_argument( + "--noreprocess", + action="store_true", + help="Use with --noinput to not re-process existing downloaded log files", + ) + parser.add_argument( + "--start", + action="store", + help="Convert a range of missions wth start time in YYYYMMDD format", + ) + parser.add_argument( + "--end", + action="store", + help="Convert a range of missions wth end time in YYYYMMDD format", + ) + parser.add_argument( + "--auv_name", + action="store", + help="Name of the AUV and the directory name for its data, e.g.: tethys, ahi, pontus", + ) + parser.add_argument( + "--log_file", + action="store", + help=( + "Path to the log file for the mission, e.g.: " + "brizo/missionlogs/2025/20250903_20250909/" + "20250905T072042/202509050720_202509051653.nc4" + ), + ) + parser.add_argument( + "--known_hash", + action="store", + help=( + "Known hash for the file to be downloaded, e.g. " + "d1235ead55023bea05e9841465d54a45dfab007a283320322e28b84438fb8a85" + ), + ) + parser.add_argument( + "-v", + "--verbose", + type=int, + choices=range(3), + action="store", + default=0, + const=1, + nargs="?", + help="verbosity level: " + + ", ".join( + [f"{i}: {v}" for i, v in enumerate(("WARN", "INFO", "DEBUG"))], + ), + ) + + self.args = parser.parse_args() + self.logger.setLevel(self._log_levels[self.args.verbose]) + self.commandline = " ".join(sys.argv) + + +if __name__ == "__main__": + extract = Extract() + extract.process_command_line() + url = os.path.join(BASE_LRAUV_WEB, extract.args.log_file) # noqa: PTH118 + output_dir = Path(BASE_PATH, Path(extract.args.log_file).parent) + extract.logger.info("Downloading %s", url) + input_file = extract.download_with_pooch(url, output_dir, extract.args.known_hash) + extract.extract_groups_to_files(input_file, output_dir) From 44e2558863a22e1f8f29857d32065a53075e800e Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 14 Oct 2025 13:48:46 -0700 Subject: [PATCH 006/121] Add extract_groups_to_files_netcdf4() as xarray fails on garbled data. --- src/data/nc42netcdfs.py | 150 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 144 insertions(+), 6 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 8b22b319..c436cd6b 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -29,7 +29,11 @@ { "name": "concentration_of_colored_dissolved_organic_matter_in_sea_water", "rename": "colored_dissolved_organic_matter", - } + }, + {"name": "longitude", "rename": "longitude"}, + {"name": "latitude", "rename": "latitude"}, + {"name": "depth", "rename": "depth"}, + {"name": "time", "rename": "time"}, ], "Aanderaa_O2": [{"name": "mass_concentration_of_oxygen_in_sea_water", "rename": "oxygen"}], "CTD_NeilBrown": [ @@ -150,6 +154,16 @@ class Extract: logger.addHandler(_handler) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) + def show_variable_mapping(self): + """Show the variable mapping.""" + for group, parms in sorted(SCIENG_PARMS.items()): + print(f"Group: {group}") # noqa: T201 + for parm in parms: + name = parm.get("name", "N/A") + rename = parm.get("rename", "N/A") + print(f" {name} -> {rename}") # noqa: T201 + print() # noqa: T201 + def download_with_pooch(self, url, local_dir, known_hash=None): """Download using pooch with caching and verification.""" downloader = pooch.HTTPDownloader(timeout=(60, 300), progressbar=True) @@ -166,6 +180,111 @@ def get_groups_netcdf4(self, file_path): with netCDF4.Dataset(file_path, "r") as dataset: return list(dataset.groups.keys()) + def extract_groups_to_files_netcdf4(self, input_file, output_dir): # noqa: C901, PLR0912 + """Extract each group to a separate NetCDF file using netCDF4 library. + The xarray library fails reading the WetLabsBB2FL group from this file: + brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4 + with garbled data for the serial variable (using ncdump): + serial = "$F!{<8D>\031@7\024[P]\001\030" ; + but netCDF4 can skip over it and read the rest of the variables.""" + output_dir = Path(output_dir) + output_dir.mkdir(exist_ok=True, parents=True) + + with netCDF4.Dataset(input_file, "r") as src_dataset: + all_groups = list(src_dataset.groups.keys()) + + self.logger.info("Extracting data from %s", input_file) + + # TODO: Read variables from the "/" (root) group. + # We'll save them to a file named "Universals.nc" + + for group_name, group_parms in SCIENG_PARMS.items(): + if group_name not in all_groups: + self.logger.warning("Group %s not found in %s", group_name, input_file) + continue + + try: + self.logger.info(" Group %s", group_name) + src_group = src_dataset.groups[group_name] + + # Get variables to extract + parms = [p["name"] for p in group_parms if "name" in p] + self.logger.debug(" Variables to extract: %s", parms) + + # Check which variables actually exist in the group + available_vars = list(src_group.variables.keys()) + vars_to_extract = [var for var in parms if var in available_vars] + + if not vars_to_extract: + self.logger.warning("No requested variables found in group %s", group_name) + continue + + # Create output file + output_file = output_dir / f"{group_name}.nc" + + with netCDF4.Dataset(output_file, "w", format="NETCDF4") as dst_dataset: + # Copy global attributes from source group + for attr_name in src_group.ncattrs(): + dst_dataset.setncattr(attr_name, src_group.getncattr(attr_name)) + + # Copy dimensions that are used by the variables we want + dims_needed = set() + for var_name in vars_to_extract: + var = src_group.variables[var_name] + dims_needed.update(var.dimensions) + + for dim_name in dims_needed: + if dim_name in src_group.dimensions: + src_dim = src_group.dimensions[dim_name] + size = len(src_dim) if not src_dim.isunlimited() else None + dst_dataset.createDimension(dim_name, size) + + # Copy coordinate variables first (if they exist) + coord_vars = [] + for dim_name in dims_needed: + if dim_name in src_group.variables: + coord_vars.append(dim_name) # noqa: PERF401 + + # Copy coordinate variables + for var_name in coord_vars: + if var_name not in vars_to_extract: + self._copy_variable(src_group, dst_dataset, var_name) + + # Copy requested variables + for var_name in vars_to_extract: + self._copy_variable(src_group, dst_dataset, var_name) + + self.logger.info("Extracted %s to %s", group_name, output_file) + + except (FileNotFoundError, OSError, ValueError) as e: + self.logger.warning("Could not extract %s: %s", group_name, e) + except KeyError as e: + self.logger.warning("Variable %s not found in group %s", e, group_name) + + def _copy_variable(self, src_group, dst_dataset, var_name): + """Helper method to copy a variable from source to destination.""" + src_var = src_group.variables[var_name] + + # Create variable in destination + dst_var = dst_dataset.createVariable( + var_name, + src_var.dtype, + src_var.dimensions, + zlib=True, # Enable compression + complevel=6, + shuffle=True, + fletcher32=True, + ) + + # Copy data + dst_var[:] = src_var[:] + + # Copy variable attributes + for attr_name in src_var.ncattrs(): + dst_var.setncattr(attr_name, src_var.getncattr(attr_name)) + + self.logger.debug(" Copied variable: %s", var_name) + def extract_groups_to_files(self, input_file, output_dir): """Extract each group to a separate NetCDF file.""" output_dir = Path(output_dir) @@ -173,15 +292,18 @@ def extract_groups_to_files(self, input_file, output_dir): all_groups = self.get_groups_netcdf4(input_file) + self.logger.info("Extracting data from %s", input_file) for group_name, group_parms in SCIENG_PARMS.items(): if group_name not in all_groups: self.logger.warning("Group %s not found in %s", group_name, input_file) continue try: + self.logger.info(" Group %s", group_name) ds = xr.open_dataset(input_file, group=group_name) output_file = output_dir / f"{group_name}.nc" # Output only the variables of interest parms = [p["name"] for p in group_parms if "name" in p] + self.logger.debug(" Variables to extract: %s", parms) ds = ds[parms] ds.to_netcdf(path=str(output_file), format="NETCDF4") ds.close() @@ -190,6 +312,10 @@ def extract_groups_to_files(self, input_file, output_dir): self.logger.warning("Could not extract %s", group_name) except KeyError: self.logger.warning("Variable %s not found in group %s", parms, group_name) + except TypeError: + self.logger.warning( + "Type error processing group %s: %s", group_name, sys.exc_info() + ) def process_command_line(self): examples = "Examples:" + "\n\n" @@ -267,6 +393,13 @@ def process_command_line(self): "d1235ead55023bea05e9841465d54a45dfab007a283320322e28b84438fb8a85" ), ) + ( + parser.add_argument( + "--show_variable_mapping", + action="store_true", + help="Show the variable mapping: Group/variable_names -> their_renames", + ), + ) parser.add_argument( "-v", "--verbose", @@ -290,8 +423,13 @@ def process_command_line(self): if __name__ == "__main__": extract = Extract() extract.process_command_line() - url = os.path.join(BASE_LRAUV_WEB, extract.args.log_file) # noqa: PTH118 - output_dir = Path(BASE_PATH, Path(extract.args.log_file).parent) - extract.logger.info("Downloading %s", url) - input_file = extract.download_with_pooch(url, output_dir, extract.args.known_hash) - extract.extract_groups_to_files(input_file, output_dir) + if extract.args.show_variable_mapping: + extract.show_variable_mapping() + sys.exit(0) + else: + url = os.path.join(BASE_LRAUV_WEB, extract.args.log_file) # noqa: PTH118 + output_dir = Path(BASE_PATH, Path(extract.args.log_file).parent) + extract.logger.info("Downloading %s", url) + input_file = extract.download_with_pooch(url, output_dir, extract.args.known_hash) + # extract.extract_groups_to_files(input_file, output_dir) + extract.extract_groups_to_files_netcdf4(input_file, output_dir) From 6f07788d6e4f4df559162734af5658f73915caa0 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 14 Oct 2025 14:46:31 -0700 Subject: [PATCH 007/121] Save variables from / into Universals.nc. --- src/data/nc42netcdfs.py | 63 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 4 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index c436cd6b..109f224c 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -180,7 +180,7 @@ def get_groups_netcdf4(self, file_path): with netCDF4.Dataset(file_path, "r") as dataset: return list(dataset.groups.keys()) - def extract_groups_to_files_netcdf4(self, input_file, output_dir): # noqa: C901, PLR0912 + def extract_groups_to_files_netcdf4(self, input_file, output_dir): # noqa: C901, PLR0912, PLR0915 """Extract each group to a separate NetCDF file using netCDF4 library. The xarray library fails reading the WetLabsBB2FL group from this file: brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4 @@ -190,14 +190,69 @@ def extract_groups_to_files_netcdf4(self, input_file, output_dir): # noqa: C901 output_dir = Path(output_dir) output_dir.mkdir(exist_ok=True, parents=True) + # Read variables from the "/" (root) group and save them to a file named "Universals.nc" + with netCDF4.Dataset(input_file, "r") as src_dataset: + root_group = src_dataset + root_parms = SCIENG_PARMS.get("/", []) + if root_parms: + try: + self.logger.info("Extracting root group '/'") + # Get variables to extract + parms = [p["name"] for p in root_parms if "name" in p] + self.logger.debug(" Variables to extract: %s", parms) + + # Check which variables actually exist in the group + available_vars = list(root_group.variables.keys()) + vars_to_extract = [var for var in parms if var in available_vars] + + if vars_to_extract: + output_file = output_dir / "Universals.nc" + # Need to use NETCDF4 as we have multiple unlimited dimensions + with netCDF4.Dataset(output_file, "w", format="NETCDF4") as dst_dataset: + # Copy global attributes from source group + for attr_name in root_group.ncattrs(): + dst_dataset.setncattr(attr_name, root_group.getncattr(attr_name)) + + # Copy dimensions that are used by the variables we want + dims_needed = set() + for var_name in vars_to_extract: + var = root_group.variables[var_name] + dims_needed.update(var.dimensions) + + for dim_name in dims_needed: + if dim_name in root_group.dimensions: + src_dim = root_group.dimensions[dim_name] + size = len(src_dim) if not src_dim.isunlimited() else None + dst_dataset.createDimension(dim_name, size) + + # Copy coordinate variables first (if they exist) + coord_vars = [] + for dim_name in dims_needed: + if dim_name in root_group.variables: + coord_vars.append(dim_name) # noqa: PERF401 + + # Copy coordinate variables + for var_name in coord_vars: + if var_name not in vars_to_extract: + self._copy_variable(root_group, dst_dataset, var_name) + + # Copy requested variables + for var_name in vars_to_extract: + self._copy_variable(root_group, dst_dataset, var_name) + + self.logger.info("Extracted root group '/' to %s", output_file) + else: + self.logger.warning("No requested variables found in root group '/'") + except (FileNotFoundError, OSError, ValueError) as e: + self.logger.warning("Could not extract root group '/': %s", e) + except KeyError as e: + self.logger.warning("Variable %s not found in root group '/'", e) + with netCDF4.Dataset(input_file, "r") as src_dataset: all_groups = list(src_dataset.groups.keys()) self.logger.info("Extracting data from %s", input_file) - # TODO: Read variables from the "/" (root) group. - # We'll save them to a file named "Universals.nc" - for group_name, group_parms in SCIENG_PARMS.items(): if group_name not in all_groups: self.logger.warning("Group %s not found in %s", group_name, input_file) From 649a842e3ab9022cebea2149d1173c86170ba586 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 14 Oct 2025 14:47:08 -0700 Subject: [PATCH 008/121] Add test for bad data to "1.3 - nc42netcdfs". --- .vscode/launch.json | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index b45961dc..88e8ae2b 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -39,13 +39,24 @@ "args": ["-v", "1", "-d", "0", "-i", "data/auv_data/dorado/missionlogs/2009.055.05/lopc.bin", "-n", "data/auv_data/dorado/missionnetcdfs/2009.055.05/lopc.nc", "-f", "--LargeCopepod_AIcrit", "0.3"] }, { - "name": "1.1 - correct_log_times.py --mission 2017.284.00 --auv_name Dorado389", + "name": "1.2 - correct_log_times.py --mission 2017.284.00 --auv_name Dorado389", "type": "debugpy", "request": "launch", "program": "${workspaceFolder}/src/data/correct_log_times.py", "console": "integratedTerminal", "args": ["--auv_name", "Dorado389", "--mission", "2017.284.00", "-v", "2"] }, + { + "name": "1.3 - nc42netcdfs", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/src/data/nc42netcdfs.py", + "console": "integratedTerminal", + // A small log_file that has a reasonable amount of data, and known_hash to verify download + //"args": ["-v", "1", "--log_file", "ahi/missionlogs/2025/20250908_20250912/20250911T201546/202509112015_202509112115.nc4", "--known_hash", "d1235ead55023bea05e9841465d54a45dfab007a283320322e28b84438fb8a85"] + // Has bad latitude and longitude values + "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] + }, { "name": "2.0 - calibrate.py", "type": "debugpy", @@ -282,16 +293,6 @@ "console": "integratedTerminal", "args": ["-v", "1", "--noinput", "--no_cleanup", "--download", "--mission", "2011.256.02"] }, - { - "name": "nc42netcdfs", - "type": "debugpy", - "request": "launch", - "program": "${workspaceFolder}/src/data/nc42netcdfs.py", - "console": "integratedTerminal", - "args": ["-v", "1", "--log_file", - "ahi/missionlogs/2025/20250908_20250912/20250911T201546/202509112015_202509112115.nc4", - "--known_hash", - "d1235ead55023bea05e9841465d54a45dfab007a283320322e28b84438fb8a85"] - }, + ] } From 2340c5c9c42b0dc74dbb00ccf1f6c7a614632d47 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 14 Oct 2025 15:55:15 -0700 Subject: [PATCH 009/121] WIP: Working out lrauv data processing workflow. --- DORADO_WORKFLOW.md | 2 +- LRAUV_WORKFLOW.md | 41 ++++++++++++++++++++++------------------- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/DORADO_WORKFLOW.md b/DORADO_WORKFLOW.md index 9640f503..1cd12da2 100644 --- a/DORADO_WORKFLOW.md +++ b/DORADO_WORKFLOW.md @@ -70,6 +70,6 @@ on the local file system's work directory is as follows: archive.py Copy the netCDF files to the archive directory. The archive directory - is initally in the AUVCTD share on atlas which is shared with the + is initially in the AUVCTD share on atlas which is shared with the data from the Dorado Gulper vehicle, but can also be on the M3 share on thalassa near the original log data. diff --git a/LRAUV_WORKFLOW.md b/LRAUV_WORKFLOW.md index 7295dbe4..98307671 100644 --- a/LRAUV_WORKFLOW.md +++ b/LRAUV_WORKFLOW.md @@ -2,35 +2,38 @@ The sequence of steps to process LRAUV data is as follows: -TODO: Update this to reflect actual LRAUV Data Workflow. It should - mimimic the Dorado Workflow, especially the last few steps. - - extract.py → calibrate.py → align.py → resample.py → archive.py → plot.py + nc42netcdfs.py → combine.py → align.py → resample.py → archive.py → plot.py Details of each step are described in the respective scripts and in the description of output netCDF files below. The output file directory structure on the local file system's work directory is as follows: ├── data - │ ├── auv_data - │ │ ├── <- e.g.: ahi, brizo, pontus, tethys, ... - │ │ │ ├── missionnetcdfs <- netCDF files - │ │ │ │ ├── <- e.g.: 2025/20250107_20250123/20250107T213313/202501072133_202501080049.nc4 - │ │ │ │ │ ├── <- .nc files for each instrument created - | | | | | | by ... - │ │ │ │ │ ├── <- .nc file with calibrated data created - | | | | | | by calibrate.py - │ │ │ │ │ ├── <- .nc file with all measurement variables + │ ├── lrauv_data + │ │ ├── <- e.g.: ahi, brizo, pontus, tethys, ... + │ │ │ ├── missionlogs/year/dlist_dir + │ │ │ │ ├── <- e.g.: ahi/missionlogs/2025/20250908_20250912/20250911T201546/202509112015_202509112115.nc4 + │ │ │ │ │ ├── <- .nc4 file containing original data + │ │ │ │ │ ├── <- .nc files, one for each group from the .nc4 file + | | | | | | data identical to original in NETCDF4 format + │ │ │ │ │ ├── <_cal> <- A single NETCDF3 .nc file containing all the + | | | | | | varibles from the .nc files along with nudged + | | | | | | latitudes and longitudes - created by combine.py + │ │ │ │ │ ├── <_align> <- .nc file with all measurement variables | | | | | | having associated coordinate variables | | | | | | at original instrument sampling rate - | | | | | | created by align.py - │ │ │ │ │ ├── <- .nc file with all measurement variables - resampled to a common time grid at n - Second intervals - created by resample.py + │ │ │ │ │ ├── <_nS> <- .nc file with all measurement variables + resampled to a common time grid at n + Second intervals - created by resample.py - ??? + nc42netcdfs.py + Extract the groups and the variables we want from the groups into + individual .nc files. These data are saved using NETCDF4 format as + there are many unlimited dimensions that are not allowed in NETCDF3. + The data in the .nc files are identical to what is in the .nc4 groups. - calibrate.py + combine.py Apply calibration coefficients to the original data. The calibrated data are written to a new netCDF file in the missionnetcdfs/ directory ending with _cal.nc. This step also includes nudging the @@ -60,6 +63,6 @@ on the local file system's work directory is as follows: archive.py Copy the netCDF files to the archive directory. The archive directory - is initally in the AUVCTD share on atlas which is shared with the + is initially in the AUVCTD share on atlas which is shared with the data from the Dorado Gulper vehicle, but can also be on the M3 share on thalassa near the original log data. From bde0115a9aea2d16b836dd4593c77f774cf6c5be Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 14 Oct 2025 15:58:04 -0700 Subject: [PATCH 010/121] WIP: Copy of calibrate.py - rework to combine lrauv data. --- src/data/combine.py | 3623 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 3623 insertions(+) create mode 100755 src/data/combine.py diff --git a/src/data/combine.py b/src/data/combine.py new file mode 100755 index 00000000..704597e4 --- /dev/null +++ b/src/data/combine.py @@ -0,0 +1,3623 @@ +#!/usr/bin/env python +""" +Calibrate original data and produce NetCDF file for mission + +Read original data from netCDF files created by logs2netcdfs.py, apply +calibration information in .cfg and .xml files associated with the +original .log files and write out a single netCDF file with the important +variables at original sampling intervals. Geometric alignment and plumbing lag +corrections are also done during this step. The file will contain combined +variables (the combined_nc member variable) and be analogous to the original +netCDF4 files produced by MBARI's LRAUVs. Rather than using groups in netCDF-4 +the data will be written in classic netCDF-CF with a naming syntax that mimics +the LRAUV group naming convention with the coordinates for each sensor: +``` + _ + _<..........> + _ + _time + _depth + _latitude + _longitude +``` +Note: The name "sensor" is used here, but it's really more aligned +with the concept of "instrument" in SSDS parlance. +""" + +__author__ = "Mike McCann" +__copyright__ = "Copyright 2020, Monterey Bay Aquarium Research Institute" + +import argparse +import logging +import os +import shlex +import shutil +import subprocess +import sys +import time +from argparse import RawTextHelpFormatter +from collections import OrderedDict +from datetime import UTC, datetime +from pathlib import Path +from socket import gethostname +from typing import NamedTuple + +import cf_xarray # Needed for the .cf accessor # noqa: F401 +import defusedxml.ElementTree as ET # noqa: N817 +import matplotlib.pyplot as plt +import numpy as np +import xarray as xr +from scipy.interpolate import interp1d +from seawater import eos80 + +try: + import cartopy.crs as ccrs # type: ignore # noqa: PGH003 + from shapely.geometry import LineString # type: ignore # noqa: PGH003 +except ModuleNotFoundError: + # cartopy is not installed, will not be able to plot maps + pass + +import pandas as pd +import pyproj +from AUV import monotonic_increasing_time_indices +from hs2_proc import compute_backscatter, hs2_calc_bb, hs2_read_cal_file +from logs2netcdfs import BASE_PATH, MISSIONLOGS, MISSIONNETCDFS, TIME, TIME60HZ, AUV_NetCDF +from matplotlib import patches +from scipy import signal + +AVG_SALINITY = 33.6 # Typical value for upper 100m of Monterey Bay + + +class Range(NamedTuple): + min: float + max: float + + +# Using lower case vehicle names, modify in _define_sensor_info() for changes over time +# Used to reduce ERROR & WARNING log messages for expected missing sensor data +EXPECTED_SENSORS = { + "dorado": [ + "navigation", + "gps", + "depth", + "ecopuck", + "hs2", + "ctd1", + "ctd2", + "isus", + "biolume", + "lopc", + "tailcone", + ], + "i2map": [ + "navigation", + "gps", + "depth", + "seabird25p", + "transmissometer", + "tailcone", + ], +} +# Used in test fixture in conftetst.py +EXPECTED_SENSORS["Dorado389"] = EXPECTED_SENSORS["dorado"] + + +def align_geom(sensor_offset, pitches): + """Use x & y sensor_offset values in meters from sensor_info and + pitch in degrees to compute and return actual depths of the sensor + based on the geometry relative to the vehicle's depth sensor. + """ + # See https://en.wikipedia.org/wiki/Rotation_matrix + # + # * instrument location with pitch applied + # / | + # / | + # / | + # / | + # / | + # / | + # / | + # / | + # / | + # / + # / + # / y + # / _ + # / o + # / f + # / f + # / * instrument location + # / | + # / \ | | + # / \ | y + # / pitch (theta) | | + # / \ | | + # --------------------x------------------+ --> nose + # + # [ cos(pitch) -sin(pitch) ] [x] [x'] + # X = + # [ sin(pitch) cos(pitch) ] [y] [y'] + offsets = [] + for pitch in pitches: + theta = pitch * np.pi / 180.0 + R = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) + x_off, y_off = np.matmul(R, sensor_offset) + offsets.append(y_off) + + return offsets + + +class Coeffs: + pass + + +# History of seabird25p.cfg file changes: + +# [mccann@elvis i2MAP]$ pwd +# /mbari/M3/master/i2MAP +# [mccann@elvis i2MAP]$ ls -l */*/*/*/seabird25p.cfg +# -rwx------. 1 519 games 3050 Sep 20 2016 2017/01/20170117/2017.017.00/seabird25p.cfg +# -rwx------. 1 519 games 3050 Sep 20 2016 2017/01/20170117/2017.017.01/seabird25p.cfg +# -rwx------. 1 lonny nobody 3050 Sep 20 2016 2017/04/20170407/2017.097.00/seabird25p.cfg +# -rwx------. 1 robs games 3050 Sep 20 2016 2017/05/20170508/2017.128.00/seabird25p.cfg +# -rwx------. 1 robs games 3109 May 11 2017 2017/05/20170512/2017.132.00/seabird25p.cfg +# -rwx------. 1 robs games 3109 May 11 2017 2017/06/20170622/2017.173.00/seabird25p.cfg +# -rwx------. 1 519 games 3109 May 11 2017 2017/08/20170824/2017.236.00/seabird25p.cfg +# -rwx------. 1 519 games 3109 May 11 2017 2017/09/20170914/2017.257.00/seabird25p.cfg +# -rwx------. 1 etrauschke games 3109 Jan 29 2018 2018/01/20180125/2018.025.00/seabird25p.cfg +# -rwx------. 1 henthorn games 3109 Feb 15 2018 2018/02/20180214/2018.045.03/seabird25p.cfg +# -rwx------. 1 lonny games 3667 Mar 2 2018 2018/03/20180306/2018.065.02/seabird25p.cfg +# -rwx------. 1 lonny games 3667 Mar 2 2018 2018/04/20180404/2018.094.00/seabird25p.cfg +# -rwx------. 1 lonny games 3667 Mar 2 2018 2018/06/20180618/2018.169.01/seabird25p.cfg +# -rwx------. 1 lonny games 3667 Jul 19 2018 2018/07/20180718/2018.199.00/seabird25p.cfg +# -rwx------. 1 jana games 3667 Aug 30 2018 2018/08/20180829/2018.241.01/seabird25p.cfg +# -rwx------. 1 lonny games 3667 Oct 25 2018 2018/10/20181023/2018.296.00/seabird25p.cfg +# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181203/2018.337.00/seabird25p.cfg +# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181210/2018.344.01/seabird25p.cfg +# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181210/2018.344.05/seabird25p.cfg +# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181210/2018.344.06/seabird25p.cfg +# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181210/2018.344.07/seabird25p.cfg +# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181210/2018.344.08/seabird25p.cfg +# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181210/2018.344.09/seabird25p.cfg +# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181210/2018.344.10/seabird25p.cfg +# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181210/2018.344.11/seabird25p.cfg +# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181210/2018.344.12/seabird25p.cfg +# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181210/2018.344.13/seabird25p.cfg +# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181214/2018.348.00/seabird25p.cfg +# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181214/2018.348.01/seabird25p.cfg +# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181214/2018.348.02/seabird25p.cfg +# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181214/2018.348.03/seabird25p.cfg +# -rwx------. 1 lonny games 3667 Mar 2 2018 2019/01/20190107/2019.007.07/seabird25p.cfg +# -rwx------. 1 lonny games 3667 Mar 2 2018 2019/01/20190107/2019.007.09/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190204/2019.035.10/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190226/2019.057.00/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190226/2019.057.01/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190226/2019.057.02/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190226/2019.057.03/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190226/2019.057.04/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190226/2019.057.05/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190226/2019.057.06/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190226/2019.057.07/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190226/2019.057.08/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190228/2019.059.01/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/04/20190408/2019.098.01/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/06/20190606/2019.157.00/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/06/20190606/2019.157.01/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/06/20190606/2019.157.02/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/07/20190709/2019.190.00/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/09/20190916/2019.259.01/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/10/20191007/2019.280.02/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/10/20191021/2019.294.00/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/11/20191107/2019.311.00/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/12/20191210/2019.344.06/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2020/01/20200108/2020.008.00/seabird25p.cfg +# -rwx------. 1 mbassett nobody 3667 Mar 2 2018 2020/02/20200210/2020.041.02/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2020/02/20200224/2020.055.01/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2020/06/20200629/2020.181.02/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2020/07/20200728/2020.210.03/seabird25p.cfg +# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2020/08/20200811/2020.224.04/seabird25p.cfg +# -rwx------. 1 lonny nobody 3899 Sep 11 2020 2020/09/20200914/2020.258.01/seabird25p.cfg +# -rwx------. 1 lonny nobody 3919 Sep 21 2020 2020/09/20200922/2020.266.01/seabird25p.cfg +# -rwxr-xr-x. 1 brian games 4267 Mar 1 2021 2021/03/20210303/2021.062.01/seabird25p.cfg +# -rwxr-xr-x. 1 robs games 4267 Mar 1 2021 2021/03/20210330/2021.089.00/seabird25p.cfg +# -rwxr-xr-x. 1 robs games 4267 Mar 1 2021 2021/05/20210512/2021.132.01/seabird25p.cfg +# -rwxr-xr-x. 1 robs games 4267 Mar 1 2021 2021/06/20210624/2021.175.03/seabird25p.cfg +# -rwx------. 1 lonny nobody 4267 Mar 1 2021 2021/09/20210921/2021.264.03/seabird25p.cfg +# -rwx------. 1 lonny nobody 4267 Mar 1 2021 2021/10/20211018/2021.291.00/seabird25p.cfg +# -rwx------. 1 lonny nobody 4267 Mar 1 2021 2021/11/20211103/2021.307.02/seabird25p.cfg +# -rwx------. 1 lonny nobody 4267 Mar 1 2021 2022/03/20220302/2022.061.01/seabird25p.cfg + + +def _calibrated_temp_from_frequency(cf, nc): + # From processCTD.m: + # TC = 1./(t_a + t_b*(log(t_f0./temp_frequency)) + t_c*((log(t_f0./temp_frequency)).^2) + t_d*((log(t_f0./temp_frequency)).^3)) - 273.15; # noqa: E501 + # From Seabird25p.cc: + # if (*_t_coefs == 'A') { + # f = ::log(T_F0/f); + # T = 1/(T_A + (T_B + (T_C + T_D*f)*f)*f) - 273.15; + # } + # else if (*_t_coefs == 'G') { + # f = ::log(T_GF0/f); + # T = 1/(T_G + (T_H + (T_I + T_J*f)*f)*f) - 273.15; + # } + K2C = 273.15 + if cf.t_coefs == "A": + calibrated_temp = ( + 1.0 + / ( + cf.t_a + + cf.t_b * np.log(cf.t_f0 / nc["temp_frequency"].to_numpy()) + + cf.t_c * np.power(np.log(cf.t_f0 / nc["temp_frequency"]), 2) + + cf.t_d * np.power(np.log(cf.t_f0 / nc["temp_frequency"]), 3) + ) + - K2C + ) + elif cf.t_coefs == "G": + calibrated_temp = ( + 1.0 + / ( + cf.t_g + + cf.t_h * np.log(cf.t_gf0 / nc["temp_frequency"].to_numpy()) + + cf.t_i * np.power(np.log(cf.t_gf0 / nc["temp_frequency"]), 2) + + cf.t_j * np.power(np.log(cf.t_gf0 / nc["temp_frequency"]), 3) + ) + - K2C + ) + else: + error_message = f"Unknown t_coefs: {cf.t_coefs}" + raise ValueError(error_message) + + return calibrated_temp + + +def _calibrated_sal_from_cond_frequency(args, combined_nc, logger, cf, nc, temp): # noqa: PLR0913 + # Comments carried over from doradosdp's processCTD.m: + # Note that recalculation of conductivity and correction for thermal mass + # are possible, however, their magnitude results in salinity differences + # of less than 10^-4. + # In other regions where these corrections are more significant, the + # corrections can be turned on. + # conductivity at S=35 psu , T=15 C [ITPS 68] and P=0 db) ==> 42.914 + sw_c3515 = 42.914 + eps = np.spacing(1) + + f_interp = interp1d( + combined_nc["depth_time"].to_numpy().tolist(), + combined_nc["depth_filtpres"].to_numpy(), + fill_value=( + combined_nc["depth_filtpres"].to_numpy()[0], + combined_nc["depth_filtpres"].to_numpy()[-1], + ), + bounds_error=False, + ) + p1 = f_interp(nc["time"].to_numpy().tolist()) + if args.plot: + pbeg = 0 + pend = len(combined_nc["depth_time"]) + if args.plot.startswith("first"): + pend = int(args.plot.split("first")[1]) + plt.figure(figsize=(18, 6)) + plt.plot( + combined_nc["depth_time"][pbeg:pend], + combined_nc["depth_filtpres"][pbeg:pend], + ":o", + nc["time"][pbeg:pend], + p1[pbeg:pend], + "o", + ) + plt.legend(("Pressure from parosci", "Interpolated to ctd time")) + title = "Comparing Interpolation of Pressure to CTD Time" + title += f" - First {pend} Points from each series" + plt.title(title) + plt.grid() + logger.debug("Pausing with plot entitled: %s. Close window to continue.", title) + plt.show() + + # Conductivity Calculation + # cfreq=cond_frequency/1000; + # c1 = (c_a*(cfreq.^c_m)+c_b*(cfreq.^2)+c_c+c_d*TC)./(10*(1+eps*p1)); + # + # seabird25p.cc: https://bitbucket.org/mbari/dorado-auv-qnx/src/master/auv/altex/onboard/seabird25p/Seabird25p.cc + # if(*_c_coefs == 'A') { + # C = (C_A*pow(f,C_M) + C_B*f*f +C_C +C_D*t)/(10*(1+EPS*p)); + # } + # else if(*_c_coefs == 'G') { + # C = (C_G +(C_H +(C_I + C_J*f)*f)*f*f) / (10.*(1+C_TCOR*t+C_PCOR*p)) ; + # } + # else { + # Syslog::write("Seabird25p::calculate_Cond(): no c_coefs set selected.\n"); + # C=0; + # } + cfreq = nc["cond_frequency"].to_numpy() / 1000.0 + + if cf.c_coefs == "A": + calibrated_conductivity = ( + cf.c_a * np.power(cfreq, cf.c_m) + + cf.c_b * np.power(cfreq, 2) + + cf.c_c + + cf.c_d * temp.to_numpy() + ) / (10 * (1 + eps * p1)) + elif cf.c_coefs == "G": + # C = (C_G +(C_H +(C_I + C_J*f)*f)*f*f) / (10.*(1+C_TCOR*t+C_PCOR*p)) ; + calibrated_conductivity = ( + cf.c_g + (cf.c_h + (cf.c_i + cf.c_j * cfreq) * cfreq) * np.power(cfreq, 2) + ) / (10 * (1 + cf.c_tcor * temp.to_numpy() + cf.c_pcor * p1)) + else: + error_message = f"Unknown c_coefs: {cf.c_coefs}" + raise ValueError(error_message) + + # % Calculate Salinty + # cratio = c1*10/sw_c3515; % sw_C is conductivity value at 35,15,0 + # CTD.salinity = sw_salt(cratio,CTD.temperature,p1); % (psu) + # seabird25p.cc: https://bitbucket.org/mbari/dorado-auv-qnx/src/master/auv/altex/onboard/seabird25p/Seabird25p.cc + # // + # // rsm 28 Mar 07: Compute salinity from conductivity, temperature and + # // presssure: + # cndr = 10.*read_cond/sw_c3515(); + # salinity = sw_salt( cndr, read_temp, depthSensor_pres); + cratio = calibrated_conductivity * 10 / sw_c3515 + calibrated_salinity = eos80.salt(cratio, temp, p1) + + return calibrated_conductivity, calibrated_salinity + + +def _oxsat(temperature, salinity): + # + # ---------------------------------- + # Oxygen saturation: f(T,S); ml/l + # ---------------------------------- + # TK = 273.15+T; % degrees Kelvin + # A1 = -173.4292; A2 = 249.6339; A3 = 143.3483; A4 = -21.8492; + # B1 = -0.033096; B2 = 0.014259; B3 = -0.00170; + # OXSAT = exp(A1 + A2*(100./TK) + A3*log(TK/100) + A4*(TK/100) + [S .* (B1 + B2*(TK/100) + (B3*(TK/100).*(TK/100)))] ); # noqa: E501 + tk = 273.15 + temperature # degrees Kelvin + a1 = -173.4292 + a2 = 249.6339 + a3 = 143.3483 + a4 = -21.8492 + b1 = -0.033096 + b2 = 0.014259 + b3 = -0.00170 + return np.exp( + a1 + + a2 * (100 / tk) + + a3 * np.log(tk / 100) + + a4 * (tk / 100) + + np.multiply( + salinity, + b1 + b2 * (tk / 100) + np.multiply(b3 * (tk / 100), (tk / 100)), + ), + ) + + +def _calibrated_O2_from_volts( # noqa: PLR0913 + combined_nc: np.array, + cf: Coeffs, + nc: xr.Dataset, + var_name: str, + temperature: xr.DataArray, + salinity: xr.DataArray, +) -> tuple[np.array, np.array, str, str]: + # Contents of doradosdp's calc_O2_SBE43.m: + # ---------------------------------------- + # function [O2] = calc_O2_SBE43(O2V,T,S,P,O2cal,time,units); + # To calculate Oxygen from sbe voltage + # Reference: W.B. Owens and R.C. Millard, 1985. A new algorithm for CTD oxygen + # calibration, J. Phys. Oceanogr. 15:621-631. + # Also, described in SeaBird application note. + # pltit = 'n'; + # % disp([' Pressure should be in dB']); + f_interp = interp1d( + combined_nc["depth_time"].to_numpy().tolist(), + combined_nc["depth_filtpres"].to_numpy(), + fill_value=( + combined_nc["depth_filtpres"].to_numpy()[0], + combined_nc["depth_filtpres"].to_numpy()[-1], + ), + bounds_error=False, + ) + pressure = f_interp(nc["time"].to_numpy().tolist()) + + # + # ---------------------------------- + # Oxygen voltage + # ---------------------------------- + # % disp([' Minimum of oxygen voltage ' num2str(min(O2V)) ' V']); + # % disp([' Maximum of oxygen voltage ' num2str(max(O2V)) ' V']); + # % disp([' Mean of oxygen voltage ' num2str(mean(O2V)) ' V']); + # docdt = [NaN;[diff(O2V)./diff(time)]]; % slope of oxygen current (uA/sec); + docdt = np.append( + np.nan, + np.divide( + np.diff(nc[var_name]), + np.diff(nc["time"].astype(np.int64).to_numpy() / 1e9), + ), + ) + + oxsat = _oxsat(temperature, salinity) + + # Owens-Millard equation + # + # ---------------------------------- + # Oxygen concentration (mL/L) + # ---------------------------------- + # Constants + # tau=0; + # + # O2 = [O2cal.SOc * ((O2V+O2cal.offset)+(tau*docdt)) + O2cal.BOc * exp(-0.03*T)].*exp(O2cal.Tcor*T + O2cal.Pcor*P).*OXSAT; # noqa: E501 + tau = 0.0 + try: + o2_mll = np.multiply( + cf.SOc * ((nc[var_name].to_numpy() + cf.Voff) + (tau * docdt)) + + cf.BOc * np.exp(-0.03 * temperature.to_numpy()), + np.multiply( + np.exp(cf.TCor * temperature.to_numpy() + cf.PCor * pressure), + oxsat.to_numpy(), + ), + ) + except AttributeError as e: + error_message = f"Cannot calculate o2_mll: {e}" + raise ValueError(error_message) from e + + # + # if strcmp(units,'umolkg')==1 + # ---------------------------------- + # Convert to umol/kg + # ---------------------------------- + # SeaBird equations are for ml/l computations + # Can convert OXSAT at atmospheric pressure to mg/l by 1.4276 + # Convert dissolved O2 to mg/l using density of oxygen = 1.4276 kg/m^3 + # dens=sw_dens(S,T,P); + # O2 = (O2 * 1.4276) .* (1e6./(dens*32)); + dens = eos80.dens(salinity.to_numpy(), temperature.to_numpy(), pressure) + o2_umolkg = np.multiply(o2_mll * 1.4276, (1.0e6 / (dens * 32))) + + return o2_mll, o2_umolkg + + +def _calibrated_O2_from_volts_SBE43( # noqa: PLR0913 + combined_nc: np.array, + cf: Coeffs, + nc: xr.Dataset, + var_name: str, + temperature: xr.DataArray, + salinity: xr.DataArray, +) -> tuple[np.array, np.array]: + # Written to handle the seabird25p O2 sensor from the i2map vehicle - October 2023 + # - Uses Equation 1 from the SeaBird 25p manual + # + # See for example: "/Volumes/DMO/MDUC_CORE_CTD_200103/Calibration Files/SBE-43/2510/2014_sep/SBE 43 O2510 09Sep14.pdf" # noqa: E501 + # Soc = oxygen calibration coefficient (ml/l/V) + # V = measured voltage (V) + # Voffset = voltage offset (V) + # A = temperature compensation coefficient (1/°C) + # B = temperature compensation coefficient (1/°C) + # C = temperature compensation coefficient (1/°C) + # T = temperature (°C, ITS-90) + # E = pressure compensation coefficient (1/dbar) + # K = temperature (°K) + # P = pressure (dbar) + + f_interp = interp1d( + combined_nc["depth_time"].to_numpy().tolist(), + combined_nc["depth_filtpres"].to_numpy(), + fill_value=( + combined_nc["depth_filtpres"].to_numpy()[0], + combined_nc["depth_filtpres"].to_numpy()[-1], + ), + bounds_error=False, + ) + pressure = f_interp(nc["time"].to_numpy().tolist()) + + # Oxsol(T,S) = oxygen saturation (ml/l); P = pressure (dbar) + oxsat = _oxsat(temperature, salinity) + + # Oxygen concentration (ml/l) = Soc * (V + Voffset) * (1.0 + A * T + B * T**2 + C * T**3 ) * Oxsol(T,S) * exp(E * P / K) # noqa: E501 + o2_mll = np.multiply( + cf.Soc * (nc[var_name].to_numpy() + cf.offset), + np.multiply( + ( + 1.0 + + cf.A * temperature.to_numpy() + + cf.B * np.power(temperature.to_numpy(), 2) + + cf.C * np.power(temperature.to_numpy(), 3) + ), + np.multiply( + oxsat.to_numpy(), + np.exp(np.divide(cf.E * pressure, (273.15 + temperature.to_numpy()))), + ), + ), + ) + + # if strcmp(units,'umolkg')==1 + # ---------------------------------- + # Convert to umol/kg + # ---------------------------------- + # SeaBird equations are for ml/l computations + # Can convert OXSAT at atmospheric pressure to mg/l by 1.4276 + # Convert dissolved O2 to mg/l using density of oxygen = 1.4276 kg/m^3 + # dens=sw_dens(S,T,P); + # O2 = (O2 * 1.4276) .* (1e6./(dens*32)); + dens = eos80.dens(salinity.to_numpy(), temperature.to_numpy(), pressure) + o2_umolkg = np.multiply(o2_mll * 1.4276, (1.0e6 / (dens * 32))) + + return o2_mll, o2_umolkg + + +def _beam_transmittance_from_volts(combined_nc, nc) -> tuple[float, float]: + # ---------------------------------------------- + # From: robs + # Subject: Fwd: Merging i2MAP nav and CTD with VARS + # Date: November 14, 2022 at 10:53:04 AM PST + # To: Mike McCann + # + # Oops, I'm sorry! Apparently I sent this to myself (ah, Monday)…. + # + # Begin forwarded message: + # + # From: robs + # Subject: Re: Merging i2MAP nav and CTD with VARS + # Date: November 14, 2022 at 8:34:22 AM PST + # To: Rob Sherlock + # + # Here is the Cal-sheet for the Transmissometer if you need it: + # + # C-Star Calibration + # Date 11.25.14 + # S/N# CST-1694DR + # Pathlength 25 cm + # Analog Output Digital Output + # Vd 0.006 V 0 counts + # Vair 4.830 V 15867 counts + # Vref 4.701 V 15443 counts + + # Relationship of transmittance (Tr) to beam attenuation coefficient (c), + # and pathlength (x, in meters): Tr = exp(-c*x) + + # To determine beam transmittance: Tr = (Vsig - Vd) / (Vref - Vd) + # To determine beam attenuation coefficient: c = -1/x * ln (Tr) + + # Vd Meter output with the beam blocked. This is the offset. + # Vair Meter output in air with a clear beam path. + # Vref Meter output with clean water in the path. + # Temperature of calibration water: temperature of clean water used to obtain Vref. + # Ambient temperature: meter temperature in air during the calibration. + # Vsig Measured signal output of meter. + # + + # Hard-coded values from the calibration sheet, but when they are available + # in the .cfg file, they should be read from cf instead. + Vd = 0.006 + Vref = 4.701 + # + # Return beam transmittance (Tr) and beam attenuation coefficient (c) + Tr = (nc["transmissometer"] - Vd) / (Vref - Vd) + with np.errstate(invalid="ignore"): + c = -1 / 0.25 * np.log(Tr) + + return Tr, c + + +class SensorInfo: + pass + + +class Calibrate_NetCDF: + logger = logging.getLogger(__name__) + _handler = logging.StreamHandler() + _handler.setFormatter(AUV_NetCDF._formatter) + logger.addHandler(_handler) + _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) + + def global_metadata(self): + """Use instance variables to return a dictionary of + metadata specific for the data that are written + """ + from datetime import datetime + + iso_now = datetime.now(tz=UTC).isoformat() + "Z" + + metadata = {} + metadata["netcdf_version"] = "4" + metadata["Conventions"] = "CF-1.6" + metadata["date_created"] = iso_now + metadata["date_update"] = iso_now + metadata["date_modified"] = iso_now + metadata["featureType"] = "trajectory" + try: + metadata["time_coverage_start"] = str( + self.combined_nc["depth_time"].to_pandas().iloc[0].isoformat(), + ) + except KeyError: + error_message = "No depth_time variable in combined_nc" + raise EOFError(error_message) from None + metadata["time_coverage_end"] = str( + self.combined_nc["depth_time"].to_pandas().iloc[-1].isoformat(), + ) + metadata["distribution_statement"] = "Any use requires prior approval from MBARI" + metadata["license"] = metadata["distribution_statement"] + metadata["useconst"] = "Not intended for legal use. Data may contain inaccuracies." + metadata["history"] = f"Created by {self.commandline} on {iso_now}" + + metadata["title"] = ( + f"Calibrated AUV sensor data from {self.args.auv_name} mission {self.args.mission}" + ) + metadata["summary"] = ( + "Observational oceanographic data obtained from an Autonomous" + " Underwater Vehicle mission with measurements at" + " original sampling intervals. The data have been calibrated" + " by MBARI's auv-python software." + ) + if self.summary_fields: + # Should be just one item in set, but just in case join them + metadata["summary"] += " " + ". ".join(self.summary_fields) + metadata["comment"] = ( + f"MBARI Dorado-class AUV data produced from original data" + f" with execution of '{self.commandline}'' at {iso_now} on" + f" host {gethostname()}. Software available at" + f" 'https://github.com/mbari-org/auv-python'" + ) + + return metadata + + def _get_file(self, download_url, local_filename, session): + with session.get(download_url, timeout=60) as resp: + HTTP_OK = 200 + if resp.status != HTTP_OK: + self.logger.warning( + "Cannot read %s, status = %s", + download_url, + resp.status, + ) + else: + self.logger.info("Started download to %s...", local_filename) + with Path(local_filename).open("wb") as handle: + for chunk in resp.content.iter_chunked(1024): + handle.write(chunk) + if self.args.verbose > 1: + self.logger.info("%s(done)", Path(local_filename).name) + + def _define_sensor_info(self, start_datetime): + # Using lower case vehicle names, modify below for changes over time + # Used to reduce ERROR log messages for missing sensor data + self.expected_sensors = { + "dorado": [ + "navigation", + "gps", + "depth", + "ecopuck", + "hs2", + "ctd1", + "ctd2", + "isus", + "biolume", + "lopc", + "tailcone", + ], + "i2map": [ + "navigation", + "gps", + "depth", + "seabird25p", + "transmissometer", + "tailcone", + ], + } + + # Horizontal and vertical distance from origin in meters + # The origin of the x, y coordinate system is location of the + # vehicle's paroscientific depth sensor in the tailcone. + class SensorOffset(NamedTuple): + x: float + y: float + + # Original configuration of Dorado389 - Modify below with changes over time + # This code uses pandas.shift() to apply a lag to the data. Posivite lag_secs + # shifts the data forward in time to account for plumbing delays for the sensor. + # As of April 2023 only integer lag_secs are supported because of pandas.shift(). + self.sinfo = OrderedDict( + [ + ( + "navigation", + { + "data_filename": "navigation.nc", + "cal_filename": None, + "lag_secs": None, + "sensor_offset": None, + }, + ), + ( + "gps", + { + "data_filename": "gps.nc", + "cal_filename": None, + "lag_secs": None, + "sensor_offset": None, + }, + ), + ( + "depth", + { + "data_filename": "parosci.nc", + "cal_filename": None, + "lag_secs": None, + "sensor_offset": SensorOffset(-0.927, -0.076), + }, + ), + ( + "hs2", + { + "data_filename": "hydroscatlog.nc", + "cal_filename": "hs2Calibration.dat", + "lag_secs": None, + "sensor_offset": SensorOffset(0.1397, -0.2794), + }, + ), + ( + "ctd1", + { + "data_filename": "ctdDriver.nc", + "cal_filename": "ctdDriver.cfg", + "lag_secs": None, + "sensor_offset": SensorOffset(1.003, 0.0001), + }, + ), + ( + "ctd2", + { + "data_filename": "ctdDriver2.nc", + "cal_filename": "ctdDriver2.cfg", + "lag_secs": None, + "sensor_offset": SensorOffset(1.003, 0.0001), + }, + ), + ( + "seabird25p", + { + "data_filename": "seabird25p.nc", + "cal_filename": "seabird25p.cfg", + "lag_secs": None, + "sensor_offset": SensorOffset(4.04, 0.0), + }, + ), + ( + "isus", + { + "data_filename": "isuslog.nc", + "cal_filename": None, + "lag_secs": 6, + "sensor_offset": None, + }, + ), + ( + "biolume", + { + "data_filename": "biolume.nc", + "cal_filename": None, + # See Slack thread https://mbari.slack.com/archives/C04ETLY6T7V/p1682439517159249?thread_ts=1682128534.742919&cid=C04ETLY6T7V + "lag_secs": 0.5, + "sensor_offset": SensorOffset(4.04, 0.0), + # From https://bitbucket.org/messiem/matlab_libraries/src/master/ + # data_access/donnees_insitu/MBARI/AUV/charge_Dorado.m + # % UBAT flow conversion + # if time>=datenum(2010,6,29), flow_conversion=4.49E-04; + # else, flow_conversion=4.5E-04; % calibration on 2/2/2009 but unknown before # noqa: E501 + # end + # flow_conversion=flow_conversion*1E3; % using flow in mL/s + # flow1Hz=rpm*flow_conversion; + "flow_conversion": 4.5e-4 * 1e3, # conversion to mL/s + }, + ), + ( + "lopc", + { + "data_filename": "lopc.nc", + "cal_filename": None, + "lag_secs": None, + "sensor_offset": None, + }, + ), + ( + "ecopuck", + { + "data_filename": "FLBBCD2K.nc", + "cal_filename": "FLBBCD2K-3695.dev", + "lag_secs": None, + "sensor_offset": None, + }, + ), + ( + "tailcone", + { + "data_filename": "tailCone.nc", + "cal_filename": None, + "lag_secs": None, + "sensor_offset": None, + }, + ), + ], + ) + + # Changes over time + if self.args.auv_name.lower().startswith("dorado"): + self.sinfo["depth"]["sensor_offset"] = None + if start_datetime >= datetime(2007, 4, 30, tzinfo=UTC): + # First missions with 10 Gulpers: 2007.120.00 & 2007.120.01 + for instr in ("ctd1", "ctd2", "hs2", "lopc", "ecopuck", "isus"): + # TODO: Verify the length of the 10-Gulper midsection + self.sinfo[instr]["sensor_offset"] = SensorOffset(4.5, 0.0) + if start_datetime >= datetime(2014, 9, 21, tzinfo=UTC): + # First mission with 20 Gulpers: 2014.265.03 + for instr in ("ctd1", "ctd2", "hs2", "lopc", "ecopuck", "isus"): + self.sinfo[instr]["sensor_offset"] = SensorOffset(4.5, 0.0) + if start_datetime >= datetime(2010, 6, 29, tzinfo=UTC): + self.sinfo["biolume"]["flow_conversion"] = 4.49e-4 * 1e3 + + def _range_qc_combined_nc( + self, + instrument: str, + variables: list[str], + ranges: dict, + set_to_nan: bool = False, # noqa: FBT001, FBT002 + ) -> None: + """For variables in combined_nc remove values that fall outside + of specified min, max range. Meant to be called by instrument so + that the union of bad values from a set of variables can be removed. + Use set_to_nan=True to set values outside of range to NaN instead of + removing all variables from the instrument. Setting set_to_nan=True + makes sense for record (data) variables - such as ctd1_salinity, + but not for coordinate variables.""" + out_of_range_indices = np.array([], dtype=int) + vars_checked = [] + for var in variables: + if var in self.combined_nc.variables: + if var in ranges: + out_of_range = np.where( + (self.combined_nc[var] < ranges[var].min) + | (self.combined_nc[var] > ranges[var].max), + )[0] + self.logger.debug( + "%s: %d out of range values = %s", + var, + len(self.combined_nc[var][out_of_range].to_numpy()), + self.combined_nc[var][out_of_range].to_numpy(), + ) + out_of_range_indices = np.union1d( + out_of_range_indices, + out_of_range, + ) + if len(out_of_range_indices) > 500: # noqa: PLR2004 + self.logger.warning( + "More than 500 (%d) %s values found outside of range. " + "This may indicate a problem with the %s data.", + len(self.combined_nc[var][out_of_range_indices].to_numpy()), + var, + instrument, + ) + if set_to_nan and var not in self.combined_nc.coords: + self.logger.info( + "Setting %s %s values to NaN", len(out_of_range_indices), var + ) + self.combined_nc[var][out_of_range_indices] = np.nan + vars_checked.append(var) + else: + self.logger.debug("No Ranges set for %s", var) + else: + self.logger.warning("%s not in self.combined_nc", var) + inst_vars = [ + str(var) for var in self.combined_nc.variables if str(var).startswith(f"{instrument}_") + ] + self.logger.info( + "Checked for data outside of these variables and ranges: %s", + [(v, ranges[v]) for v in vars_checked], + ) + if not set_to_nan: + for var in inst_vars: + self.logger.info( + "%s: deleting %d values found outside of above ranges: %s", + var, + len(self.combined_nc[var][out_of_range_indices].to_numpy()), + self.combined_nc[var][out_of_range_indices].to_numpy(), + ) + coord = next(iter(self.combined_nc[var].coords)) + self.combined_nc[f"{var}_qced"] = ( + self.combined_nc[var] + .drop_isel({coord: out_of_range_indices}) + .rename({f"{instrument}_time": f"{instrument}_time_qced"}) + ) + self.combined_nc = self.combined_nc.drop_vars(inst_vars) + for var in inst_vars: + self.logger.debug("Renaming %s_qced to %s", var, var) + self.combined_nc[var] = self.combined_nc[f"{var}_qced"].rename( + {f"{coord}_qced": coord}, + ) + qced_vars = [f"{var}_qced" for var in inst_vars] + self.combined_nc = self.combined_nc.drop_vars(qced_vars) + self.logger.info("Done range checking %s", instrument) + + def _read_data(self, logs_dir, netcdfs_dir): # noqa: C901, PLR0912 + """Read in all the instrument data into member variables named by "sensor" + Access xarray.Dataset like: self.ctd.data, self.navigation.data, ... + Access calibration coefficients like: self.ctd.cals.t_f0, or as a + dictionary for hs2 data. Collect summary metadata fields that should + describe the source of the data if copied from M3. + """ + self.summary_fields = set() + for sensor, info in self.sinfo.items(): + sensor_info = SensorInfo() + orig_netcdf_filename = Path(netcdfs_dir, info["data_filename"]) + self.logger.debug( + "Reading data from %s into self.%s.orig_data", + orig_netcdf_filename, + sensor, + ) + try: + sensor_info.orig_data = xr.open_dataset( + orig_netcdf_filename, decode_timedelta=False + ) + except (FileNotFoundError, ValueError) as e: + self.logger.debug( + "%-10s: Cannot open file %s: %s", + sensor, + orig_netcdf_filename, + e, + ) + except OverflowError: + self.logger.exception( + "%-10s: Cannot open file %s", + sensor, + orig_netcdf_filename, + ) + self.logger.info( + "Perhaps _remove_bad_values() needs to be called for it in logs2netcdfs.py", + ) + if info["cal_filename"]: + cal_filename = Path(logs_dir, info["cal_filename"]) + self.logger.debug( + "Reading calibrations from %s into self.%s.cals", + orig_netcdf_filename, + sensor, + ) + if str(cal_filename).endswith(".cfg"): + try: + sensor_info.cals = self._read_cfg(cal_filename) + except FileNotFoundError as e: + self.logger.debug("%s", e) + elif str(cal_filename).endswith(".dev"): + try: + sensor_info.cals = self._read_eco_dev(cal_filename) + except FileNotFoundError as e: + self.logger.debug("%s", e) + + setattr(self, sensor, sensor_info) + if hasattr(sensor_info, "orig_data"): + try: + self.summary_fields.add( + getattr(self, sensor).orig_data.attrs["summary"], + ) + except KeyError: + self.logger.warning("%s: No summary field", orig_netcdf_filename) + + # TODO: Warn if no data found and if logs2netcdfs.py should be run + + def _read_cfg(self, cfg_filename): + """Emulate what get_auv_cal.m and processCTD.m do in the + Matlab doradosdp toolbox + """ + self.logger.debug("Opening %s", cfg_filename) + coeffs = Coeffs() + # Default for non-i2map data + coeffs.t_coefs = "A" + coeffs.c_coefs = "A" + with Path(cfg_filename).open() as fh: + for line in fh: + ##self.logger.debug(line) + if line.startswith("//"): + continue + # From get_auv_cal.m - Handle CTD calibration parameters + if line[:2] in ( + "t_", + "c_", + "ep", + "SO", + "BO", + "Vo", + "TC", + "PC", + "Sc", + "Da", + ): + coeff, value = (s.strip() for s in line.split("=")) + try: + self.logger.debug("Saving %s", line) + # Like in Seabird25p.cc use ?_coefs to determine which + # calibration scheme to use for i2map data + if coeff in {"t_coefs", "c_coefs"}: + setattr(coeffs, coeff, str(value.split(";")[0])) + else: + setattr(coeffs, coeff, float(value.split(";")[0])) + except ValueError as e: + self.logger.debug("%s", e) + return coeffs + + def _cal_date_xml_files( + self, + sensor_dir: str, + cal_date_dirs: list, + serial_number: int, + ) -> dict: + cal_date_xml_files = {} + for cal_date_dir in cal_date_dirs: + find_cmd = f'find "{Path(sensor_dir, cal_date_dir)}" -iname "*.xml"' + self.logger.debug("Executing %s", find_cmd) + import subprocess + + safe_sensor_dir = Path(sensor_dir).resolve() + safe_cal_date_dir = Path(sensor_dir, cal_date_dir).resolve() + + find_cmd = f'find "{safe_sensor_dir}" "{safe_cal_date_dir}" -iname "*.xml"' + if not safe_sensor_dir.is_dir() or not safe_cal_date_dir.is_dir(): + error_message = "Invalid directory paths provided." + raise ValueError(error_message) + if not safe_sensor_dir.is_dir() or not safe_cal_date_dir.is_dir(): + error_message = "Invalid directory paths provided." + raise ValueError(error_message) + result = subprocess.run( # noqa: S603 + shlex.split(find_cmd), # noqa: S603 + capture_output=True, + text=True, + check=True, + ) + xml_files = [x for x in result.stdout.split("\n") if x] + if len(xml_files) == 0: + self.logger.debug( + "Cannot find %s.xml in %s/%s", + serial_number, + sensor_dir, + cal_date_dir, + ) + continue + if len(xml_files) > 1: + self.logger.warning( + "Found %d xml files in %s/%s", + len(xml_files), + sensor_dir, + cal_date_dir, + ) + self.logger.info("{xml_files}") + cal_xml_filename = xml_files[0] + + # The .xml file looks like: + # + # + # 2510 + # 06-May-22 + # 1 + # + # + # 0.0000 + # 0.0000e+000 + # .... + try: + root = ET.parse(cal_xml_filename).getroot() + except ET.ParseError as e: + self.logger.warning( + "Cannot parse %s: %s", + cal_xml_filename, + e, + ) + continue + try: + cal_date = datetime.strptime( + root.find("CalibrationDate").text, + "%d-%b-%y", + ).replace(tzinfo=UTC) + except ValueError as e: + self.logger.warning( + "Cannot parse CalibrationDate, %s", + root.find("CalibrationDate").text, + ) + # "/Volumes/DMO/MDUC_CORE_CTD_200103/Calibration Files/SBE-43/143/2011_June/Oxygen_SBE43_0143.XML" # noqa: E501 + # has: 08-Jun-11p + if root.find("CalibrationDate").text.endswith("p"): + self.logger.info("Trying to parse CalibrationDate without 'p'") + cal_date = datetime.strptime( + root.find("CalibrationDate").text[:-1], + "%d-%b-%y", + ).replace(tzinfo=UTC) + else: + error_message = ( + f"Cannot parse CalibrationDate {root.find('CalibrationDate').text}" + ) + raise ValueError(error_message) from e + cal_date_xml_files[cal_date] = cal_xml_filename + + return OrderedDict(sorted(cal_date_xml_files.items())) + + def _read_oxy_coeffs( # noqa: C901, PLR0912, PLR0915 + self, + cfg_filename: Path, + portstbd: str = "", + ) -> tuple[Coeffs, str]: + """Based on the serial number found as a comment in the .cfg file find + the approriate calibration coefficients for the oxygen sensor within the + '/DMO/MDUC_CORE_CTD_200103/Calibration Files' shared drive folder. + portstbd is either "", "port" or "stbd". + """ + # For i2map .cfg file lines look like: + # //OxygenSerialNumber = 2510; + # //note - this is the sensor in line with the C & T sensors. Goes to voltage channel 3 + # + # //OxygenSerialNumber = 3968; + # //note - this sensor is installed on the stbd side of the vehicle in line with the + # // transmissometer. Goes to voltage channel 5 + # //note - seabird has adopted a new DO calibration with a polynomial for temp correction + # //A = -3.0812e-003 + # //B = 7.8442e-005 + # //C = -9.0601e-007 + # //E = 0.036 + # SOc = 0.4466; + # BOc = 0.0000; + # Voff = -0.5070; + # TCor = -0.0000; + # PCor = 1.3500e-04; //not given in new calibration sheet + + # Read from .cfg file to get the serial numbers of the oxygen sensors + self.logger.debug("Opening %s", cfg_filename) + coeffs = Coeffs() + + portstbd_order = { + "port": 0, + "stbd": 1, + } # Typical order of oxygen sensors in seabird25p.cfg file + with cfg_filename.open() as fh: + sensor_count = 0 + serial_numbers = [] + for line in fh: + self.logger.debug(line) + if line.startswith("//OxygenSerialNumber = "): + serial_numbers.append(int(line.split()[-1].strip(";"))) + sensor_count += 1 + if len(serial_numbers) == 0: + error_message = f"No oxygen sensor serial number found in {cfg_filename}" + raise ValueError(error_message) + if len(serial_numbers) > 2: # noqa: PLR2004 + error_message = f"More than 2 oxygen sensor serial numbers found in {cfg_filename}" + raise ValueError(error_message) + if portstbd: + serial_number = serial_numbers[portstbd_order[portstbd]] + self.logger.info( + "Looking for calibration file for O2 sensor serial number %s on %s side", + serial_number, + portstbd, + ) + elif len(serial_numbers) == 1: + self.logger.info( + "Looking for calibration file for O2 sensor serial number %s", + serial_numbers[0], + ) + serial_number = serial_numbers[0] + else: + error_message = ( + f"Multiple oxygen sensor serial numbers found in {cfg_filename} " + "with no port or stbd specified" + ) + raise ValueError(error_message) + + # Find the calibration file for the oxygen sensor + self.logger.debug( + "Finding calibration file for oxygen serial number = %s on mission %s", + serial_number, + self.args.mission, + ) + + safe_calibration_dir = Path(self.calibration_dir).resolve() + if not safe_calibration_dir.is_dir(): + error_message = f"Calibration directory '{self.calibration_dir}' does not exist" + raise LookupError(error_message) + find_cmd = f'find "{safe_calibration_dir}" -name "{serial_number}"' + self.logger.info("Executing: %s ", find_cmd) + safe_find_cmd = shlex.split(find_cmd) + sensor_dir = subprocess.run( # noqa: S603 + safe_find_cmd, # noqa: S603 + capture_output=True, + text=True, + check=True, + ).stdout.strip() + self.logger.debug("%s", sensor_dir) + + safe_sensor_dir = Path(sensor_dir).resolve() + if not safe_sensor_dir.is_dir(): + error_message = f"Sensor directory '{sensor_dir}' does not exist" + raise LookupError(error_message) + # Find only the direct child directories: https://stackoverflow.com/a/20103980 + # Unable to use subprocess.run() with find an "*" in the command, apparently + dir_find_cmd = f'find "{safe_sensor_dir}"/* -maxdepth 0 -type d' + self.logger.debug("Executing: dir_find_cmd = %s", dir_find_cmd) + cal_date_dirs = [x.split("/")[-1] for x in os.popen(dir_find_cmd).read().split("\n") if x] # noqa: S605 + self.logger.info("Found calibration date dirs: %s", " ".join(cal_date_dirs)) + cal_dates = self._cal_date_xml_files(sensor_dir, cal_date_dirs, serial_number) + mission_start = self.seabird25p.orig_data.cf["time"].to_numpy()[0] + cal_date_to_use = next(iter(cal_dates)) # Default to first calibration date + for cal_date in cal_dates: + # Find the most recent calibration date just before the mission start + self.logger.debug( + "Comparing cal_date=%s with mission_start=%s", cal_date, mission_start + ) + self.logger.info( + "File %s has CalibrationDate %s", + cal_dates[cal_date], + cal_date, + ) + if np.datetime64(cal_date.replace(tzinfo=None)) > mission_start: + self.logger.info( + "Breaking from loop as %s is after %s with mission_start=%s", + cal_dates[cal_date], + self.args.mission, + mission_start, + ) + break + cal_date_to_use = cal_date + + if np.datetime64(cal_date_to_use.replace(tzinfo=None)) < mission_start: + self.logger.info( + "File %s is just before %s with mission_start=%s", + cal_dates[cal_date_to_use], + self.args.mission, + mission_start, + ) + else: + self.logger.info( + "File %s is the first calibration file, but is after %s with mission_start=%s", + cal_dates[cal_date_to_use], + self.args.mission, + mission_start, + ) + + # Read the calibration coefficients from the .cal file which looks like: + # INSTRUMENT_TYPE=SBE43 + # SERIALNO=2510 + # OCALDATE=09-Sep-14 + # SOC= 4.533809e-001 + # VOFFSET=-5.191352e-001 + # A=-5.251956e-003 + # B= 2.762519e-004 + # C=-4.164687e-006 + # E= 3.600000e-002 + # Tau20= 1.030000e+000 + + # parse the .xml file to get the "equation 1" calibration coefficients: + # + # + # 5.0544e-001 + # -0.5124 + # -4.8460e-003 + # 2.2670e-004 + # -3.2013e-006 + # 2.5826e+000 + # 1.92634e-004 + # -4.64803e-002 + # 3.6000e-002 + # 1.5600 + #

-3.3000e-002

+ #

5.0000e+003

+ #

1.4500e+003

+ #
+ root = ET.parse(cal_dates[cal_date_to_use]).getroot() + cal_xml_serial_number = int(root.find("SerialNumber").text) + if cal_xml_serial_number != serial_number: + self.logger.warning( + "Serial number in %s = %s does not match %s", + cal_dates[cal_date_to_use], + cal_xml_serial_number, + serial_number, + ) + for elem in root.findall("CalibrationCoefficients[@equation]"): + if elem.attrib["equation"] == "1": + eq1 = elem + for child in eq1: + try: + setattr(coeffs, child.tag, float(child.text)) + except ValueError: + setattr(coeffs, child.tag, child.text) + + return coeffs, cal_dates[cal_date_to_use] + + def _read_eco_dev(self, dev_filename): + """Read calibration information from the file associated with the + ecopuck log data. The number match what are in the cal sheets in + https://bitbucket.org/messiem/auv-biolum/src/master/DATA/sensors%20%26%20calibration/FLBBCD2K_Dorado/ + + As of 13 January 2023 the contents of all the FLBBCD2K-3695.dev files are the same: + ECO FLBBCD2K-3695 + Created on: 10/29/2014 + + COLUMNS=9 + N/U=1 + N/U=2 + N/U=3 + CHL=4 0.0073 45 + N/U=5 + Lambda=6 1.633E-06 46 700 700 + N/U=7 + CDOM=8 0.0909 45 + N/U=9 + """ + # Read the calibration coefficients from the .dev file, in case they change + coeffs = Coeffs() + with dev_filename.open() as fh: + for line in fh: + if line.startswith("CHL"): + # CHL (μg/l) = Scale Factor * (Output - Dark counts) + coeffs.chl_scale_factor = float(line.split()[1]) + coeffs.chl_dark_counts = float(line.split()[2]) + elif line.startswith("Lambda"): + # From Scattering Meter Calibration Sheet - wavelength 700 nm + # "Lambda" == "bbp700" ? + # β(θc) m-1 sr-1 = Scale Factor x (Output - Dark Counts) + coeffs.bbp700_scale_factor = float(line.split()[1]) + coeffs.bbp700_dark_counts = float(line.split()[2]) + elif line.startswith("CDOM"): + # CDOM (ppb) = Scale Factor x (Output - Dark Counts) + coeffs.cdom_scale_factor = float(line.split()[1]) + coeffs.cdom_dark_counts = float(line.split()[2]) + return coeffs + + def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 + # AUV navigation data, which comes from a process on the vehicle that + # integrates data from several instruments. We use it to grab the DVL + # data to help determine vehicle position when it is below the surface. + # + # Nav.depth is used to compute pressure for salinity and oxygen computations + # Nav.latitude and Nav.longitude converted to degrees were added to + # the log file at end of 2004 + # Nav.roll, Nav.pitch, Nav.yaw, Nav.Xpos and Nav.Ypos are extracted for + # 3-D mission visualization + try: + orig_nc = getattr(self, sensor).orig_data + except FileNotFoundError as e: + self.logger.error("%s", e) # noqa: TRY400 + return + except AttributeError: + error_message = ( + f"{sensor} has no orig_data - likely a missing or zero-sized .log file" + f" in {Path(MISSIONLOGS, self.args.mission)}" + ) + raise EOFError(error_message) from None + + # Remove non-monotonic times + self.logger.debug("Checking for non-monotonic increasing times") + monotonic = monotonic_increasing_time_indices(orig_nc.get_index("time")) + if (~monotonic).any(): + self.logger.debug( + "Removing non-monotonic increasing times at indices: %s", + np.argwhere(~monotonic).flatten(), + ) + orig_nc = orig_nc.sel(time=monotonic) + + source = self.sinfo[sensor]["data_filename"] + coord_str = f"{sensor}_time {sensor}_depth {sensor}_latitude {sensor}_longitude" + vars_to_qc = [] + # Units of these angles are radians in the original files, we want degrees + vars_to_qc.append("navigation_roll") + self.combined_nc["navigation_roll"] = xr.DataArray( + orig_nc["mPhi"].to_numpy() * 180 / np.pi, + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name=f"{sensor}_roll", + ) + self.combined_nc["navigation_roll"].attrs = { + "long_name": "Vehicle roll", + "standard_name": "platform_roll_angle", + "units": "degree", + "coordinates": coord_str, + "comment": f"mPhi from {source}", + } + + vars_to_qc.append("navigation_pitch") + self.combined_nc["navigation_pitch"] = xr.DataArray( + orig_nc["mTheta"].to_numpy() * 180 / np.pi, + coords=[orig_nc.get_index("time")], + dims={"navigation_time"}, + name="pitch", + ) + self.combined_nc["navigation_pitch"].attrs = { + "long_name": "Vehicle pitch", + "standard_name": "platform_pitch_angle", + "units": "degree", + "coordinates": coord_str, + "comment": f"mTheta from {source}", + } + + vars_to_qc.append("navigation_yaw") + self.combined_nc["navigation_yaw"] = xr.DataArray( + orig_nc["mPsi"].to_numpy() * 180 / np.pi, + coords=[orig_nc.get_index("time")], + dims={"navigation_time"}, + name="yaw", + ) + self.combined_nc["navigation_yaw"].attrs = { + "long_name": "Vehicle yaw", + "standard_name": "platform_yaw_angle", + "units": "degree", + "coordinates": coord_str, + "comment": f"mPsi from {source}", + } + + self.combined_nc["navigation_posx"] = xr.DataArray( + orig_nc["mPos_x"].to_numpy() - orig_nc["mPos_x"].to_numpy()[0], + coords=[orig_nc.get_index("time")], + dims={"navigation_time"}, + name="posx", + ) + self.combined_nc["navigation_posx"].attrs = { + "long_name": "Relative lateral easting", + "units": "m", + "coordinates": coord_str, + "comment": f"mPos_x (minus first position) from {source}", + } + + self.combined_nc["navigation_posy"] = xr.DataArray( + orig_nc["mPos_y"].to_numpy() - orig_nc["mPos_y"].to_numpy()[0], + coords=[orig_nc.get_index("time")], + dims={"navigation_time"}, + name="posy", + ) + self.combined_nc["navigation_posy"].attrs = { + "long_name": "Relative lateral northing", + "units": "m", + "coordinates": coord_str, + "comment": f"mPos_y (minus first position) from {source}", + } + + vars_to_qc.append("navigation_depth") + self.combined_nc["navigation_depth"] = xr.DataArray( + orig_nc["mDepth"].to_numpy(), + coords=[orig_nc.get_index("time")], + dims={"navigation_time"}, + name="navigation_depth", + ) + self.combined_nc["navigation_depth"].attrs = { + "long_name": "Depth from Nav", + "standard_name": "depth", + "units": "m", + "comment": f"mDepth from {source}", + } + + self.combined_nc["navigation_mWaterSpeed"] = xr.DataArray( + orig_nc["mWaterSpeed"].to_numpy(), + coords=[orig_nc.get_index("time")], + dims={"navigation_time"}, + name="navigation_mWaterSpeed", + ) + self.combined_nc["navigation_mWaterSpeed"].attrs = { + "long_name": "Current speed based upon DVL data", + "standard_name": "platform_speed_wrt_sea_water", + "units": "m/s", + "comment": f"mWaterSpeed from {source}", + } + + if "latitude" in orig_nc: + navlat_var = "latitude" + elif "latitudeNav" in orig_nc: + # Starting with 2022.243.00 the latitude variable name was changed + navlat_var = "latitudeNav" + else: + navlat_var = None # noqa: F841 + self.logger.debug( + "Likely before 2004.167.04 when latitude was added to navigation.log", + ) + + navlons = None + navlats = None + if "longitude" in orig_nc: + # starting with 2004.167.04 latitude & longitude were added to navigation.log + navlons = orig_nc["longitude"].to_numpy() + navlats = orig_nc["latitude"].to_numpy() + elif "longitudeNav" in orig_nc: + # Starting with 2022.243.00 the longitude variable name was changed + navlons = orig_nc["longitudeNav"].to_numpy() + navlats = orig_nc["latitudeNav"].to_numpy() + else: + # Up through 2004.112.02 we converted from Easting/Northing to lat/lon + # - all missions in Monterey Bay (Zone 10) + self.logger.info( + "Converting from Easting/Northing to lat/lon for mission %s", + self.args.mission, + ) + proj = pyproj.Proj(proj="utm", zone=10, ellps="WGS84", radians=False) + navlons, navlats = proj( + orig_nc["mPos_y"].to_numpy(), + orig_nc["mPos_x"].to_numpy(), + inverse=True, + ) + navlons = navlons * np.pi / 180.0 + navlats = navlats * np.pi / 180.0 + + if navlons.any() and navlats.any(): + vars_to_qc.append("navigation_latitude") + self.combined_nc["navigation_latitude"] = xr.DataArray( + navlats * 180 / np.pi, + coords=[orig_nc.get_index("time")], + dims={"navigation_time"}, + name="latitude", + ) + self.combined_nc["navigation_latitude"].attrs = { + "long_name": "latitude", + "standard_name": "latitude", + "units": "degrees_north", + "comment": f"latitude (converted from radians) from {source}", + } + vars_to_qc.append("navigation_longitude") + self.combined_nc["navigation_longitude"] = xr.DataArray( + navlons * 180 / np.pi, + coords=[orig_nc.get_index("time")], + dims={"navigation_time"}, + name="longitude", + ) + # Setting standard_name attribute here once sets it for all variables + self.combined_nc["navigation_longitude"].coords[f"{sensor}_time"].attrs = { + "standard_name": "time", + } + self.combined_nc["navigation_longitude"].attrs = { + "long_name": "longitude", + "standard_name": "longitude", + "units": "degrees_east", + "comment": f"longitude (converted from radians) from {source}", + } + else: + # Setting standard_name attribute here once sets it for all variables + self.combined_nc["navigation_depth"].coords[f"{sensor}_time"].attrs = { + "standard_name": "time", + } + + # % Remove obvious outliers that later disrupt the section plots. + # % (First seen on mission 2008.281.03) + # % In case we ever use this software for the D Allan B mapping vehicle determine + # % the good depth range from the median of the depths + # % From mission 2011.250.11 we need to first eliminate the near surface values + # % before taking the median. + # pdIndx = find(Nav.depth > 1); + # posDepths = Nav.depth(pdIndx); + pos_depths = np.where(self.combined_nc["navigation_depth"].to_numpy() > 1) + if self.args.mission in {"2013.301.02", "2009.111.00"}: + self.logger.info("Bypassing Nav QC depth check") + maxGoodDepth = 1250 + else: + if pos_depths[0].size == 0: + self.logger.warning( + "No positive depths found in %s/navigation.nc", + self.args.mission, + ) + maxGoodDepth = 1250 + else: + maxGoodDepth = 7 * np.median(pos_depths) + self.logger.debug("median of positive valued depths = %s", np.median(pos_depths)) + if maxGoodDepth < 0: + maxGoodDepth = 100 # Fudge for the 2009.272.00 mission where median was -0.1347! + if self.args.mission == "2010.153.01": + maxGoodDepth = 1250 # Fudge for 2010.153.01 where the depth was bogus, about 1.3 + + self.logger.debug("Finding depths less than '%s' and times > 0'", maxGoodDepth) + + if self.args.mission == "2010.172.01": + self.logger.info( + "Performing special QC for %s/navigation.nc", + self.args.mission, + ) + self._range_qc_combined_nc( + instrument="navigation", + variables=vars_to_qc, + ranges={ + "navigation_depth": Range(0, 1000), + "navigation_roll": Range(-180, 180), + "navigation_pitch": Range(-180, 180), + "navigation_yaw": Range(-360, 360), + "navigation_longitude": Range(-360, 360), + "navigation_latitude": Range(-90, 90), + }, + ) + + missions_to_check = { + "2004.345.00", + "2005.240.00", + "2007.134.09", + "2010.293.00", + "2011.116.00", + "2013.227.00", + "2016.348.00", + "2017.121.00", + "2017.269.01", + "2017.297.00", + "2017.347.00", + "2017.304.00", + "2011.166.00", + } + if self.args.mission in missions_to_check: + self.logger.info( + "Removing points outside of Monterey Bay for %s/navigation.nc", self.args.mission + ) + self._range_qc_combined_nc( + instrument="navigation", + variables=vars_to_qc, + ranges={ + "navigation_longitude": Range(-122.1, -121.7), + "navigation_latitude": Range(36, 37), + }, + ) + if self.args.mission == "2010.284.00": + self.logger.info( + "Removing points outside of time range for %s/navigation.nc", + self.args.mission, + ) + self._range_qc_combined_nc( + instrument="navigation", + variables=[v for v in self.combined_nc.variables if v.startswith(sensor)], + ranges={ + f"{sensor}_time": Range( + pd.Timestamp(2010, 10, 11, 20, 0, 0), + pd.Timestamp(2010, 10, 12, 3, 28, 0), + ), + }, + ) + + def _nudge_pos(self, max_sec_diff_at_end=10): # noqa: C901, PLR0912, PLR0915 + """Apply linear nudges to underwater latitudes and longitudes so that + they match the surface gps positions. + """ + self.segment_count = None + self.segment_minsum = None + + try: + lon = self.combined_nc["navigation_longitude"] + except KeyError: + error_message = "No navigation_longitude data in combined_nc" + raise EOFError(error_message) from None + lat = self.combined_nc["navigation_latitude"] + lon_fix = self.combined_nc["gps_longitude"] + lat_fix = self.combined_nc["gps_latitude"] + + self.logger.info( + f"{'seg#':5s} {'end_sec_diff':12s} {'end_lon_diff':12s} {'end_lat_diff':12s}" # noqa: G004 + f" {'len(segi)':9s} {'seg_min':>9s} {'u_drift (cm/s)':14s} {'v_drift (cm/s)':14s}" + f" {'start datetime of segment':>29}", + ) + + # Any dead reckoned points before first GPS fix - usually empty + # as GPS fix happens before dive + segi = np.where(lat.cf["T"].data < lat_fix.cf["T"].data[0])[0] + if lon[:][segi].any(): + lon_nudged_array = lon[segi] + lat_nudged_array = lat[segi] + dt_nudged = lon.get_index("navigation_time")[segi] + self.logger.debug( + "Filled _nudged arrays with %d values starting at %s " + "which were before the first GPS fix at %s", + len(segi), + lat.get_index("navigation_time")[0], + lat_fix.get_index("gps_time")[0], + ) + else: + lon_nudged_array = np.array([]) + lat_nudged_array = np.array([]) + dt_nudged = np.array([], dtype="datetime64[ns]") + if segi.any(): + seg_min = ( + lat.get_index("navigation_time")[segi][-1] + - lat.get_index("navigation_time")[segi][0] + ).total_seconds() / 60 + else: + seg_min = 0 + self.logger.info( + f"{' ':5} {'-':>12} {'-':>12} {'-':>12} {len(segi):-9d} {seg_min:9.2f} {'-':>14} {'-':>14} {'-':>29}", # noqa: E501, G004 + ) + + seg_count = 0 + seg_minsum = 0 + for i in range(len(lat_fix) - 1): + # Segment of dead reckoned (under water) positions, each surrounded by GPS fixes + segi = np.where( + np.logical_and( + lat.cf["T"].data > lat_fix.cf["T"].data[i], + lat.cf["T"].data < lat_fix.cf["T"].data[i + 1], + ), + )[0] + if not segi.any(): + self.logger.debug( + f"No dead reckoned values found between GPS times of " # noqa: G004 + f"{lat_fix.cf['T'].data[i]} and {lat_fix.cf['T'].data[i + 1]}", + ) + continue + + end_sec_diff = float(lat_fix.cf["T"].data[i + 1] - lat.cf["T"].data[segi[-1]]) / 1.0e9 + + end_lon_diff = float(lon_fix[i + 1]) - float(lon[segi[-1]]) + end_lat_diff = float(lat_fix[i + 1]) - float(lat[segi[-1]]) + + # Compute approximate horizontal drift rate as a sanity check + try: + u_drift = ( + end_lon_diff + * float(np.cos(lat_fix[i + 1] * np.pi / 180)) + * 60 + * 185300 + / (float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9) + ) + except ZeroDivisionError: + u_drift = 0 + try: + v_drift = ( + end_lat_diff + * 60 + * 185300 + / (float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9) + ) + except ZeroDivisionError: + v_drift = 0 + + if abs(end_lon_diff) > 1 or abs(end_lat_diff) > 1: + # It's a problem if we have more than 1 degree difference at the end of the segment. + # This is usually because the GPS fix is bad, but sometimes it's because the + # dead reckoned position is bad. Or sometimes it's both as in dorado 2016.384.00. + # Early QC by calling _range_qc_combined_nc() can remove the bad points. + # Monterey Bay missions that have bad points can be added to the lists in + # _navigation_process() and/or _gps_process(). + self.logger.info( + f"{i:5d}: {end_sec_diff:12.3f} {end_lon_diff:12.7f}" # noqa: G004 + f" {end_lat_diff:12.7f} {len(segi):-9d} {seg_min:9.2f}" + f" {u_drift:14.3f} {v_drift:14.3f} {lat.cf['T'].data[segi][-1]}", + ) + self.logger.error( + "End of underwater segment dead reckoned position is too different " + "from GPS fix: abs(end_lon_diff) (%s) > 1 or abs(end_lat_diff) (%s) > 1", + end_lon_diff, + end_lat_diff, + ) + self.logger.info( + "Fix this error by calling _range_qc_combined_nc() in " + "_navigation_process() and/or _gps_process() for %s %s", + self.args.auv_name, + self.args.mission, + ) + error_message = ( + f"abs(end_lon_diff) ({end_lon_diff}) > 1 or " + f"abs(end_lat_diff) ({end_lat_diff}) > 1" + ) + raise ValueError(error_message) + if abs(end_sec_diff) > max_sec_diff_at_end: + # Happens in dorado 2016.348.00 because of a bad GPS fixes being removed + self.logger.warning( + "abs(end_sec_diff) (%s) > max_sec_diff_at_end (%s)", + end_sec_diff, + max_sec_diff_at_end, + ) + self.logger.info( + "Overriding end_lon_diff (%s) and end_lat_diff (%s) by setting them to 0", + end_lon_diff, + end_lat_diff, + ) + end_lon_diff = 0 + end_lat_diff = 0 + + seg_min = float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9 / 60 + seg_minsum += seg_min + + if len(segi) > 10: # noqa: PLR2004 + self.logger.info( + f"{i:5d}: {end_sec_diff:12.3f} {end_lon_diff:12.7f}" # noqa: G004 + f" {end_lat_diff:12.7f} {len(segi):-9d} {seg_min:9.2f}" + f" {u_drift:14.3f} {v_drift:14.3f} {lat.cf['T'].data[segi][-1]}", + ) + + # Start with zero adjustment at begining and linearly ramp up to the diff at the end + lon_nudge = np.interp( + lon.cf["T"].data[segi].astype(np.int64), + [ + lon.cf["T"].data[segi].astype(np.int64)[0], + lon.cf["T"].data[segi].astype(np.int64)[-1], + ], + [0, end_lon_diff], + ) + lat_nudge = np.interp( + lat.cf["T"].data[segi].astype(np.int64), + [ + lat.cf["T"].data[segi].astype(np.int64)[0], + lat.cf["T"].data[segi].astype(np.int64)[-1], + ], + [0, end_lat_diff], + ) + + # Sanity checks + if ( + np.max(np.abs(lon[segi] + lon_nudge)) > 180 # noqa: PLR2004 + or np.max(np.abs(lat[segi] + lon_nudge)) > 90 # noqa: PLR2004 + ): + self.logger.warning( + "Nudged coordinate is way out of reasonable range - segment %d", + seg_count, + ) + self.logger.warning( + " max(abs(lon)) = %s", + np.max(np.abs(lon[segi] + lon_nudge)), + ) + self.logger.warning( + " max(abs(lat)) = %s", + np.max(np.abs(lat[segi] + lat_nudge)), + ) + + lon_nudged_array = np.append(lon_nudged_array, lon[segi] + lon_nudge) + lat_nudged_array = np.append(lat_nudged_array, lat[segi] + lat_nudge) + dt_nudged = np.append(dt_nudged, lon.cf["T"].data[segi]) + seg_count += 1 + + # Any dead reckoned points after first GPS fix - not possible to nudge, just copy in + segi = np.where(lat.cf["T"].data > lat_fix.cf["T"].data[-1])[0] + seg_min = 0 + if segi.any(): + lon_nudged_array = np.append(lon_nudged_array, lon[segi]) + lat_nudged_array = np.append(lat_nudged_array, lat[segi]) + dt_nudged = np.append(dt_nudged, lon.cf["T"].data[segi]) + seg_min = float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9 / 60 + + self.logger.info( + f"{seg_count + 1:4d}: {'-':>12} {'-':>12} {'-':>12} {len(segi):-9d} {seg_min:9.2f} {'-':>14} {'-':>14}", # noqa: E501, G004 + ) + self.segment_count = seg_count + self.segment_minsum = seg_minsum + + self.logger.info("Points in final series = %d", len(dt_nudged)) + + lon_nudged = xr.DataArray( + data=lon_nudged_array, + dims=["time"], + coords={"time": dt_nudged}, + name="longitude", + ) + lat_nudged = xr.DataArray( + data=lat_nudged_array, + dims=["time"], + coords={"time": dt_nudged}, + name="latitude", + ) + if self.args.plot: + fig, axes = plt.subplots(nrows=2, figsize=(18, 6)) + axes[0].plot(lat_nudged.coords["time"].data, lat_nudged, "-") + axes[0].plot(lat.cf["T"].data, lat, "--") + axes[0].plot(lat_fix.cf["T"].data, lat_fix, "*") + axes[0].set_ylabel("Latitude") + axes[0].legend(["Nudged", "Original", "GPS Fixes"]) + axes[1].plot(lon_nudged.coords["time"].data, lon_nudged, "-") + axes[1].plot(lon.cf["T"].data, lon, "--") + axes[1].plot(lon_fix.cf["T"].data, lon_fix, "*") + axes[1].set_ylabel("Longitude") + axes[1].legend(["Nudged", "Original", "GPS Fixes"]) + title = "Corrected nav from _nudge_pos()" + fig.suptitle(title) + axes[0].grid() + axes[1].grid() + self.logger.debug("Pausing with plot entitled: %s. Close window to continue.", title) + plt.show() + + gps_plot = True + if gps_plot: + try: + ax = plt.axes(projection=ccrs.PlateCarree()) + except NameError: + self.logger.warning("No gps_plot, could not import cartopy") + return lon_nudged, lat_nudged + nudged = LineString(zip(lon_nudged.to_numpy(), lat_nudged.to_numpy(), strict=False)) + original = LineString(zip(lon.to_numpy(), lat.to_numpy(), strict=False)) + ax.add_geometries( + [nudged], + crs=ccrs.PlateCarree(), + edgecolor="red", + facecolor="none", + label="Nudged", + ) + ax.add_geometries( + [original], + crs=ccrs.PlateCarree(), + edgecolor="grey", + facecolor="none", + label="Original", + ) + handle_gps = ax.scatter( + lon_fix.to_numpy(), + lat_fix.to_numpy(), + color="green", + label="GPS Fixes", + ) + bounds = nudged.buffer(0.02).bounds + extent = bounds[0], bounds[2], bounds[1], bounds[3] + ax.set_extent(extent, crs=ccrs.PlateCarree()) + ax.coastlines() + handle_nudged = patches.Rectangle((0, 0), 1, 0.1, facecolor="red") + handle_original = patches.Rectangle((0, 0), 1, 0.1, facecolor="gray") + ax.legend( + [handle_nudged, handle_original, handle_gps], + ["Nudged", "Original", "GPS Fixes"], + ) + ax.gridlines( + crs=ccrs.PlateCarree(), + draw_labels=True, + linewidth=1, + color="gray", + alpha=0.5, + ) + ax.set_title(f"{self.args.auv_name} {self.args.mission}") + self.logger.debug( + "Pausing map plot (doesn't work well in VS Code debugger)." + " Close window to continue.", + ) + plt.show() + + return lon_nudged, lat_nudged + + def _gps_process(self, sensor): + try: + orig_nc = getattr(self, sensor).orig_data + except FileNotFoundError as e: + self.logger.exception("%s", e) # noqa: TRY401 + return + except AttributeError: + if self.args.mission == "2010.151.04": + # Gulf of Mexico mission - use data from usbl.dat file(s) + usbl_file = Path( + self.args.base_path, + self.args.auv_name, + MISSIONNETCDFS, + self.args.mission, + "usbl.nc", + ) + if not usbl_file.exists(): + # Copy from archive AUVCTD/missionnetcdfs/YYYY/YYYYJJJ the usbl.nc file + from archive import AUVCTD_VOL + + year = self.args.mission.split(".")[0] + YYYYJJJ = "".join(self.args.mission.split(".")[:2]) + missionnetcdfs_dir = Path( + AUVCTD_VOL, + MISSIONNETCDFS, + year, + YYYYJJJ, + self.args.mission, + ) + shutil.copyfile( + Path(missionnetcdfs_dir, "usbl.nc"), + usbl_file, + ) + self.logger.info( + "Just for the GoMx mission 2010.151.04 use data from %s " + "that came from the missionlogs/usbl.dat file", + usbl_file, + ) + orig_nc = xr.open_dataset(usbl_file) + + # Subsample usbl so that it has similar frequency to gps data + # and convert to radians so that it matches the gps data + orig_nc = orig_nc.isel(time=slice(None, None, 10)) + orig_nc["latitude"] = orig_nc["latitude"] * np.pi / 180.0 + orig_nc["longitude"] = orig_nc["longitude"] * np.pi / 180.0 + else: + error_message = ( + f"{sensor} has no orig_data - likely a missing or zero-sized .log file" + f" in {Path(MISSIONLOGS, self.args.mission)}" + ) + raise EOFError(error_message) from None + + lat = orig_nc["latitude"] * 180.0 / np.pi + if not lat.any(): + error_message = f"No latitude data found in {sensor}.log" + raise ValueError(error_message) + if orig_nc["longitude"][0] > 0: + lon = -1 * orig_nc["longitude"] * 180.0 / np.pi + else: + lon = orig_nc["longitude"] * 180.0 / np.pi + + gps_time_to_save = orig_nc.get_index("time") + lat_to_save = lat + lon_to_save = lon + + source = self.sinfo[sensor]["data_filename"] + vars_to_qc = [] + vars_to_qc.append("gps_latitude") + self.combined_nc["gps_latitude"] = xr.DataArray( + lat_to_save.to_numpy(), + coords=[gps_time_to_save], + dims={"gps_time"}, + name="gps_latitude", + ) + self.combined_nc["gps_latitude"].attrs = { + "long_name": "GPS Latitude", + "standard_name": "latitude", + "units": "degrees_north", + "comment": f"latitude from {source}", + } + + vars_to_qc.append("gps_longitude") + self.combined_nc["gps_longitude"] = xr.DataArray( + lon_to_save.to_numpy(), + coords=[gps_time_to_save], + dims={"gps_time"}, + name="gps_longitude", + ) + # Setting standard_name attribute here once sets it for all variables + self.combined_nc["gps_longitude"].coords[f"{sensor}_time"].attrs = { + "standard_name": "time", + } + self.combined_nc["gps_longitude"].attrs = { + "long_name": "GPS Longitude", + "standard_name": "longitude", + "units": "degrees_east", + "comment": f"longitude from {source}", + } + if self.args.mission in { + "2004.345.00", + "2005.240.00", + "2007.134.09", + "2010.293.00", + "2011.116.00", + "2013.227.00", + "2016.348.00", + "2017.121.00", + "2017.269.01", + "2017.297.00", + "2017.347.00", + "2017.304.00", + "2011.166.00", + }: + self.logger.info( + "Removing points outside of Monterey Bay for %s/gps.nc", self.args.mission + ) + self._range_qc_combined_nc( + instrument="gps", + variables=vars_to_qc, + ranges={ + "gps_latitude": Range(36, 37), + "gps_longitude": Range(-122.1, -121.7), + }, + ) + + # TODO: Put this in a separate module like match_to_gps.py or something + # With navigation dead reckoned positions available in self.combined_nc + # and the gps positions added we can now match the underwater inertial + # (dead reckoned) positions to the surface gps positions. + nudged_longitude, nudged_latitude = self._nudge_pos() + self.combined_nc["nudged_latitude"] = nudged_latitude + self.combined_nc["nudged_latitude"].attrs = { + "long_name": "Nudged Latitude", + "standard_name": "latitude", + "units": "degrees_north", + "comment": "Dead reckoned latitude nudged to GPS positions", + } + self.combined_nc["nudged_longitude"] = nudged_longitude + self.combined_nc["nudged_longitude"].attrs = { + "long_name": "Nudged Longitude", + "standard_name": "longitude", + "units": "degrees_east", + "comment": "Dead reckoned longitude nudged to GPS positions", + } + + def _depth_process(self, sensor, latitude=36, cutoff_freq=1): # noqa: PLR0915 + """Depth data (from the Parosci) is 10 Hz - Use a butterworth window + to filter recorded pressure to values that are appropriately sampled + at 1 Hz (when matched with other sensor data). cutoff_freq is in + units of Hz. + """ + try: + orig_nc = getattr(self, sensor).orig_data + except (FileNotFoundError, AttributeError) as e: + self.logger.debug("Original data not found for %s: %s", sensor, e) + return + + # Remove non-monotonic times + self.logger.debug("Checking for non-monotonic increasing times") + monotonic = monotonic_increasing_time_indices(orig_nc.get_index("time")) + if (~monotonic).any(): + self.logger.debug( + "Removing non-monotonic increasing times at indices: %s", + np.argwhere(~monotonic).flatten(), + ) + orig_nc = orig_nc.sel(time=monotonic) + + depths = orig_nc["depth"] + # Remove egregious outliers before filtering seen form 2008 through 2012 + # ad hoc corrections for depth after testing stoqs_all_dorado load in July 2023 + mission_depth_ranges = { + "2006.054.00": Range(-1, 150), # Soquel Canyon + "2007.120.00": Range(-0.5, 32), # Shallow N. Monterey Bay + "2007.120.01": Range(-0.5, 32), # Shallow N. Monterey Bay + "2007.123.05": Range(-0.5, 32), # Shallow N. Monterey Bay + "2008.281.03": Range(-1, 30), # Shallow (< 30 m depth ) Soquel Bight + "2009.084.02": Range(-1, 60), # Diamond - lots of bad depths + "2009.085.02": Range(-1, 60), # Monterey Bay - lots of bad depths + "2009.112.07": Range(-1, 30), # Shallow Monterey Bay + "2009.113.00": Range(-1, 30), # Shallow Monterey Bay + "2009.154.00": Range(-1, 50), # Shallow Monterey Bay + "2009.155.03": Range(-1, 50), # Shallow Monterey Bay + "2009.272.00": Range(-1, 40), # Shallow Monterey Bay + "2010.118.00": Range(-1, 260), # Monterey Canyon transect + "2010.181.01": Range(-0.5, 22), # Shallow N. Monterey Bay + "2010.181.02": Range(-0.5, 22), # Shallow N. Monterey Bay + # ESP drifter missions out at station 67-70 with Flyer doing casts and ESP + # drifting south toward Davidson Seamount - no gulpers (Frederic sent me note about survey grouping) # noqa: E501 + # Faulty parosci lead to several mission depth aborts at beginning of this set of volume surveys # noqa: E501 + "2010.258.00": Range(-1, 110), # Offshore CANON 2010 + "2010.258.01": Range(-1, 110), # Offshore CANON 2010 + "2010.258.02": Range(-1, 110), # Offshore CANON 2010 + "2010.258.03": Range(-1, 110), # Offshore CANON 2010 + "2010.258.04": Range(-1, 110), # Offshore CANON 2010 + "2010.259.01": Range(-1, 110), # Offshore CANON 2010 + "2010.259.02": Range(-1, 110), # Offshore CANON 2010 + "2011.061.00": Range(-1, 50), # Shallow Monterey Bay + "2011.171.01": Range(-1, 55), # Shallow Monterey Bay + "2011.250.01": Range(-1, 60), # Shallow Monterey Bay + "2011.263.00": Range(-1, 30), # Shallow Monterey Bay + "2011.285.01": Range(-1, 25), # Shallow Monterey Bay + "2012.258.00": Range(-1, 160), # Shallow Monterey Bay + "2012.270.04": Range(-1, 30), # Shallow Monterey Bay + } + if self.args.mission in mission_depth_ranges: + valid_depth_range = mission_depth_ranges[self.args.mission] + self.logger.info( + "Removing depths outside of valid_depth_range=%s for self.args.mission=%s", + valid_depth_range, + self.args.mission, + ) + out_of_range = np.where( + (depths < valid_depth_range.min) | (depths > valid_depth_range.max), + )[0] + self.logger.debug( + "depths: %d out of range values = %s", + len(depths[out_of_range].to_numpy()), + depths[out_of_range].to_numpy(), + ) + self.logger.info("Setting %d depths values to NaN", len(out_of_range)) + depths[out_of_range] = np.nan + depths = depths.dropna("time", how="all") + + # From initial CVS commit in 2004 the processDepth.m file computed + # pres from depth this way. I don't know what is done on the vehicle + # side where a latitude of 36 is not appropriate: GoM, SoCal, etc. + self.logger.debug("Converting depth to pressure using latitude = %s", latitude) + pres = eos80.pres(depths, latitude) + + # See https://docs.scipy.org/doc/scipy-1.0.0/reference/generated/scipy.signal.filtfilt.html#scipy.signal.filtfilt + # and https://docs.scipy.org/doc/scipy-1.0.0/reference/generated/scipy.signal.butter.html#scipy.signal.butter + # Sample rate should be 10 - calcuate it to be sure + sample_rate = 1.0 / np.round( + np.mean(np.diff(depths["time"])) / np.timedelta64(1, "s"), + decimals=2, + ) + if sample_rate != 10: # noqa: PLR2004 + self.logger.warning( + "Expected sample_rate to be 10 Hz, instead it's %.2f Hz", + sample_rate, + ) + + # The Wn parameter for butter() is fraction of the Nyquist frequency + Wn = cutoff_freq / (sample_rate / 2.0) + b, a = signal.butter(8, Wn) + try: + depth_filtpres_butter = signal.filtfilt(b, a, pres) + except ValueError as e: + error_message = "Likely short or empty file" + raise EOFError(error_message) from e + depth_filtdepth_butter = signal.filtfilt(b, a, depths) + + # Use 10 points in boxcar as in processDepth.m + a = 10 + b = signal.windows.boxcar(a) + depth_filtpres_boxcar = signal.filtfilt(b, a, pres) + pres_plot = True # Set to False for debugging other plots + if self.args.plot and pres_plot: + # Use Pandas to plot multiple columns of data + # to validate that the filtering works as expected + pbeg = 0 + pend = len(depths.get_index("time")) + if self.args.plot.startswith("first"): + pend = int(self.args.plot.split("first")[1]) + df_plot = pd.DataFrame(index=depths.get_index("time")[pbeg:pend]) + df_plot["pres"] = pres[pbeg:pend] + df_plot["depth_filtpres_butter"] = depth_filtpres_butter[pbeg:pend] + df_plot["depth_filtpres_boxcar"] = depth_filtpres_boxcar[pbeg:pend] + title = ( + f"First {pend} points from" + f" {self.args.mission}/{self.sinfo[sensor]['data_filename']}" + ) + ax = df_plot.plot(title=title, figsize=(18, 6)) + ax.grid("on") + self.logger.debug("Pausing with plot entitled: %s. Close window to continue.", title) + plt.show() + + depth_filtdepth = xr.DataArray( + depth_filtdepth_butter, + coords=[depths.get_index("time")], + dims={"depth_time"}, + name="depth_filtdepth", + ) + depth_filtdepth.attrs = { + "long_name": "Filtered Depth", + "standard_name": "depth", + "units": "m", + "comment": ( + f"Original {sample_rate:.3f} Hz depth data filtered using" + f" Butterworth window with cutoff frequency of {cutoff_freq} Hz" + ), + } + + depth_filtpres = xr.DataArray( + depth_filtpres_butter, + coords=[depths.get_index("time")], + dims={"depth_time"}, + name="depth_filtpres", + ) + depth_filtpres.attrs = { + "long_name": "Filtered Pressure", + "standard_name": "sea_water_pressure", + "units": "dbar", + "comment": ( + f"Original {sample_rate:.3f} Hz pressure data filtered using" + f" Butterworth window with cutoff frequency of {cutoff_freq} Hz" + ), + } + + self.combined_nc["depth_filtdepth"] = depth_filtdepth + self.combined_nc["depth_filtpres"] = depth_filtpres + + def _hs2_process(self, sensor, logs_dir): # noqa: C901, PLR0912, PLR0915 + try: + orig_nc = getattr(self, sensor).orig_data + except (FileNotFoundError, AttributeError) as e: + self.logger.debug("Original data not found for %s: %s", sensor, e) + return + + # Remove non-monotonic times + self.logger.debug("Checking for non-monotonic increasing times") + monotonic = monotonic_increasing_time_indices(orig_nc.get_index("time")) + if (~monotonic).any(): + self.logger.debug( + "Removing non-monotonic increasing times at indices: %s", + np.argwhere(~monotonic).flatten(), + ) + orig_nc = orig_nc.sel(time=monotonic) + + try: + cal_fn = Path(logs_dir, self.sinfo["hs2"]["cal_filename"]) + cals = hs2_read_cal_file(cal_fn) + except FileNotFoundError as e: + self.logger.warning("Cannot process HS2 data: %s", e) + return + + hs2 = hs2_calc_bb(orig_nc, cals) + + source = self.sinfo[sensor]["data_filename"] + coord_str = f"{sensor}_time {sensor}_depth {sensor}_latitude {sensor}_longitude" + + # Blue backscatter + if hasattr(hs2, "bbp420"): + blue_bs = xr.DataArray( + hs2.bbp420.to_numpy(), + coords=[hs2.bbp420.get_index("time")], + dims={"hs2_time"}, + name="hs2_bbp420", + ) + blue_bs.attrs = { + "long_name": "Particulate backscattering coefficient at 420 nm", + "coordinates": coord_str, + "units": "m-1", + "comment": (f"Computed by hs2_calc_bb() from data in {source}"), + } + if hasattr(hs2, "bbp470"): + blue_bs = xr.DataArray( + hs2.bbp470.to_numpy(), + coords=[hs2.bbp470.get_index("time")], + dims={"hs2_time"}, + name="hs2_bbp470", + ) + blue_bs.attrs = { + "long_name": "Particulate backscattering coefficient at 470 nm", + "coordinates": coord_str, + "units": "m-1", + "comment": (f"Computed by hs2_calc_bb() from data in {source}"), + } + + # Red backscatter + if hasattr(hs2, "bbp676"): + red_bs = xr.DataArray( + hs2.bbp676.to_numpy(), + coords=[hs2.bbp676.get_index("time")], + dims={"hs2_time"}, + name="hs2_bbp676", + ) + red_bs.attrs = { + "long_name": "Particulate backscattering coefficient at 676 nm", + "coordinates": coord_str, + "units": "m-1", + "comment": (f"Computed by hs2_calc_bb() from data in {source}"), + } + if hasattr(hs2, "bbp700"): + red_bs = xr.DataArray( + hs2.bbp700.to_numpy(), + coords=[hs2.bbp700.get_index("time")], + dims={"hs2_time"}, + name="hs2_bbp700", + ) + red_bs.attrs = { + "long_name": "Particulate backscattering coefficient at 700 nm", + "coordinates": coord_str, + "units": "m-1", + "comment": (f"Computed by hs2_calc_bb() from data in {source}"), + } + + # Fluorescence + if hasattr(hs2, "fl676"): + fl676 = xr.DataArray( + hs2.fl676.to_numpy(), + coords=[hs2.fl676.get_index("time")], + dims={"hs2_time"}, + name="hs2_fl676", + ) + fl676.attrs = { + "long_name": "Fluorescence at 676 nm", + "coordinates": coord_str, + "comment": (f"Computed by hs2_calc_bb() from data in {source}"), + } + fl = fl676 + if hasattr(hs2, "fl700"): + fl700 = xr.DataArray( + hs2.fl700.to_numpy(), + coords=[hs2.fl700.get_index("time")], + dims={"hs2_time"}, + name="hs2_fl700", + ) + fl700.attrs = { + "long_name": "Fluorescence at 700 nm", + "coordinates": coord_str, + "comment": (f"Computed by hs2_calc_bb() from data in {source}"), + } + fl = fl700 + + # Zeroth level quality control - same as in legacy Matlab + mblue = np.ma.masked_invalid(blue_bs) + mblue = np.ma.masked_greater(mblue, 0.1) + mred = np.ma.masked_invalid(red_bs) + mred = np.ma.masked_greater(mred, 0.1) + mfl = np.ma.masked_invalid(fl) + mfl = np.ma.masked_greater(mfl, 0.02) + + bad_hs2 = [ + f"{b}, {r}, {f}" + for b, r, f in zip( + blue_bs.to_numpy()[:][mblue.mask], + red_bs.to_numpy()[:][mred.mask], + fl.to_numpy()[:][mfl.mask], + strict=False, + ) + ] + + if bad_hs2: + self.logger.info( + "Number of bad %s points: %d of %d", + sensor, + len(blue_bs.to_numpy()[:][mblue.mask]), + len(blue_bs), + ) + self.logger.debug( + "Removing bad %s points (indices, (blue, red, fl)): %s, %s", + sensor, + np.where(mred.mask)[0], + bad_hs2, + ) + blue_bs = blue_bs[:][~mblue.mask] + red_bs = red_bs[:][~mfl.mask] + + red_blue_plot = True # Set to False for debugging other plots + if self.args.plot and red_blue_plot: + # Use Pandas to more easiily plot multiple columns of data + pbeg = 0 + pend = len(blue_bs.get_index("hs2_time")) + if self.args.plot.startswith("first"): + pend = int(self.args.plot.split("first")[1]) + df_plot = pd.DataFrame(index=blue_bs.get_index("hs2_time")[pbeg:pend]) + df_plot["blue_bs"] = blue_bs[pbeg:pend] + df_plot["red_bs"] = red_bs[pbeg:pend] + ## df_plot["fl"] = fl[pbeg:pend] + title = ( + f"First {pend} points from" + f" {self.args.mission}/{self.sinfo[sensor]['data_filename']}" + ) + ax = df_plot.plot(title=title, figsize=(18, 6), ylim=(-0.003, 0.004)) + ax.grid("on") + self.logger.debug("Pausing with plot entitled: %s. Close window to continue.", title) + plt.show() + + # Save blue, red, & fl to combined_nc, also + if hasattr(hs2, "bbp420"): + self.combined_nc["hs2_bbp420"] = blue_bs + if hasattr(hs2, "bbp470"): + self.combined_nc["hs2_bbp470"] = blue_bs + if hasattr(hs2, "bbp676"): + self.combined_nc["hs2_bbp676"] = red_bs + if hasattr(hs2, "bbp700"): + self.combined_nc["hs2_bbp700"] = red_bs + if hasattr(hs2, "fl676"): + self.combined_nc["hs2_fl676"] = fl + if hasattr(hs2, "fl700"): + self.combined_nc["hs2_fl700"] = fl + + # For missions before 2009.055.05 hs2 will have attributes like bbp470, bbp676, and fl676 + # Hobilabs modified the instrument in 2009 to now give: bbp420, bbp700, and fl700, + # apparently giving a better measurement of chlorophyll. + # + # Detect the difference in this code and keep the member names descriptive in the survey + # data so the the end user knows the difference. + + # Align Geometry, correct for pitch + self.combined_nc[f"{sensor}_depth"] = self._geometric_depth_correction( + sensor, + orig_nc, + ) + out_fn = f"{self.args.auv_name}_{self.args.mission}_cal.nc" + self.combined_nc[f"{sensor}_depth"].attrs = { + "long_name": "Depth", + "units": "m", + "comment": ( + f"Variable depth_filtdepth from {out_fn} linearly interpolated" + f" to {sensor}_time and corrected for pitch using" + f" {self.sinfo[sensor]['sensor_offset']}" + ), + } + + # Coordinates latitude & longitude are interpolated to the sensor time + # in the align.py code. Here we add the sensor depths as this is where + # the sensor offset is applied with _geometric_depth_correction(). + + def _calibrated_oxygen( # noqa: PLR0913 + self, + logs_dir, + sensor, + cf, + orig_nc, + var_name, + temperature, + salinity, + portstbd="", + ) -> tuple[xr.DataArray, xr.DataArray]: + """Calibrate oxygen data, returning DataArrays.""" + + if sensor == "seabird25p": + cf, cal_file = self._read_oxy_coeffs( + Path(logs_dir, self.sinfo[sensor]["cal_filename"]), + portstbd, + ) + ( + oxy_mll, + oxy_umolkg, + ) = _calibrated_O2_from_volts_SBE43( + self.combined_nc, + cf, + orig_nc, + var_name, + temperature, + salinity, + ) + mll_comment = ( + f"Derived from {var_name} from {sensor}.nc and eq 1 calibration coefficients " + f"{vars(cf)} from {cal_file = }" + ) + umolkg_comment = ( + f"Computed from oxygen_mll_{portstbd} with " + "'np.multiply(o2_mll * 1.4276, (1.0e6 / (dens * 32)))'" + ) + self.logger.info("%s: parsed from %s file: %s", var_name, cal_file, vars(cf)) + else: + ( + oxy_mll, + oxy_umolkg, + ) = _calibrated_O2_from_volts( + self.combined_nc, + cf, + orig_nc, + var_name, + temperature, + salinity, + ) + mll_comment = ( + f"Derived from {var_name} from {sensor}.nc using calibration " + f"coefficients {vars(cf)}" + ) + umolkg_comment = ( + "Computed from oxygen_mll with " + "'np.multiply(o2_mll * 1.4276, (1.0e6 / (dens * 32)))'" + ) + oxygen_mll = xr.DataArray( + oxy_mll, + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name="oxygen_mll" + portstbd, + ) + oxygen_mll.attrs = { + "long_name": "Dissolved Oxygen", + "units": "ml/l", + "comment": mll_comment, + } + + oxygen_umolkg = xr.DataArray( + oxy_umolkg, + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name="oxygen_umolkg" + portstbd, + ) + oxygen_umolkg.attrs = { + "long_name": "Dissolved Oxygen", + "units": "umol/kg", + "comment": umolkg_comment, + } + return oxygen_mll, oxygen_umolkg + + def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 + # Don't be put off by the length of this method. + # It's lengthy because of all the possible netCDF variables and + # attribute metadata that need to be added to the combined_nc. + try: + orig_nc = getattr(self, sensor).orig_data + except FileNotFoundError as e: + self.logger.exception("%s", e) # noqa: TRY401 + return + except AttributeError: + error_message = ( + f"{sensor} has no orig_data - likely a missing or zero-sized .log file" + f" in {Path(MISSIONLOGS, self.args.mission)}" + ) + raise EOFError(error_message) from None + + # Remove non-monotonic times + self.logger.debug("Checking for non-monotonic increasing times") + monotonic = monotonic_increasing_time_indices(orig_nc.get_index("time")) + if (~monotonic).any(): + self.logger.debug( + "Removing non-monotonic increasing times at indices: %s", + np.argwhere(~monotonic).flatten(), + ) + orig_nc = orig_nc.sel(time=monotonic) + + # Need to do this zeroth-level QC to calibrate temperature + orig_nc["temp_frequency"][orig_nc["temp_frequency"] == 0.0] = np.nan + source = self.sinfo[sensor]["data_filename"] + + # === Temperature and salinity variables === + # Seabird specific calibrations + vars_to_qc = [] + self.logger.debug("Calling _calibrated_temp_from_frequency()") + temperature = xr.DataArray( + _calibrated_temp_from_frequency(cf, orig_nc), + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name="temperature", + ) + temperature.attrs = { + "long_name": "Temperature", + "standard_name": "sea_water_temperature", + "units": "degree_Celsius", + "comment": ( + f"Derived from temp_frequency from {source} via calibration parms: {cf.__dict__}" + ), + } + self.combined_nc[f"{sensor}_temperature"] = temperature + + self.logger.debug("Calling _calibrated_sal_from_cond_frequency()") + cal_conductivity, cal_salinity = _calibrated_sal_from_cond_frequency( + self.args, + self.combined_nc, + self.logger, + cf, + orig_nc, + temperature, + ) + conductivity = xr.DataArray( + cal_conductivity, + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name="conductivity", + ) + conductivity.attrs = { + "long_name": "Conductivity", + "standard_name": "sea_water_conductivity", + "units": "Siemens/meter", + "comment": ( + f"Derived from cond_frequency from {source} via calibration parms: {cf.__dict__}" + ), + } + self.combined_nc[f"{sensor}_conductivity"] = conductivity + vars_to_qc.append(f"{sensor}_salinity") + salinity = xr.DataArray( + cal_salinity, + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name="salinity", + ) + salinity.attrs = { + "long_name": "Salinity", + "standard_name": "sea_water_salinity", + "units": "", + "comment": ( + f"Derived from cond_frequency from {source} via calibration parms: {cf.__dict__}" + ), + } + self.combined_nc[f"{sensor}_salinity"] = salinity + + # Variables computed onboard the vehicle that are recomputed here + self.logger.debug("Collecting temperature_onboard") + temperature_onboard = xr.DataArray( + orig_nc["temperature"], + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name="temperature_onboard", + ) + # Onboard software sets bad values to absolute zero - replace with NaN + temperature_onboard[temperature_onboard <= -273] = np.nan # noqa: PLR2004 + temperature_onboard.attrs = { + "long_name": "Temperature computed onboard the vehicle", + "units": "degree_Celsius", + "comment": ( + "Temperature computed onboard the vehicle from" + " calibration parameters installed on the vehicle" + " at the time of deployment." + ), + } + self.combined_nc[f"{sensor}_temperature_onboard"] = temperature_onboard + + self.logger.debug("Collecting conductivity_onboard") + conductivity_onboard = xr.DataArray( + orig_nc["conductivity"], + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name="conductivity_onboard", + ) + conductivity_onboard.attrs = { + "long_name": "Conductivity computed onboard the vehicle", + "units": "Siemens/meter", + "comment": ( + "Temperature computed onboard the vehicle from" + " calibration parameters installed on the vehicle" + " at the time of deployment." + ), + } + self.combined_nc[f"{sensor}_conductivity_onboard"] = conductivity_onboard + + if "salinity" in orig_nc: + self.logger.debug("Collecting salinity_onboard") + salinity_onboard = xr.DataArray( + orig_nc["salinity"], + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name="salinity_onboard", + ) + salinity_onboard.attrs = { + "long_name": "Salinity computed onboard the vehicle", + "units": "", + "comment": ( + "Salinity computed onboard the vehicle from" + " calibration parameters installed on the vehicle" + " at the time of deployment." + ), + } + self.combined_nc[f"{sensor}_salinity_onboard"] = salinity_onboard + + # === Oxygen variables === + # original values in units of volts + self.logger.debug("Collecting dissolvedO2") + try: + dissolvedO2 = xr.DataArray( + orig_nc["dissolvedO2"], + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name="dissolvedO2", + ) + dissolvedO2.attrs = { + "long_name": "Dissolved Oxygen sensor", + "units": "Volts", + "comment": ("Analog Voltage Channel 6 - to be converted to umol/kg"), + } + self.combined_nc[f"{sensor}_dissolvedO2"] = dissolvedO2 + ( + self.combined_nc[f"{sensor}_oxygen_mll"], + self.combined_nc[f"{sensor}_oxygen_umolkg"], + ) = self._calibrated_oxygen( + logs_dir, + sensor, + cf, + orig_nc, + "dissolvedO2", + temperature, + salinity, + "", + ) + except KeyError: + self.logger.debug("No dissolvedO2 data in %s", self.args.mission) + except ValueError as e: + cfg_file = Path( + MISSIONLOGS, + "".join(self.args.mission.split(".")[:2]), + self.args.mission, + self.sinfo["ctd"]["cal_filename"], + ) + self.logger.exception("Likely missing a calibration coefficient in %s", cfg_file) + self.logger.error(e) # noqa: TRY400 + self.logger.debug("Collecting dissolvedO2_port") + try: + dissolvedO2_port = xr.DataArray( + orig_nc["dissolvedO2_port"], + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name="dissolvedO2_port", + ) + dissolvedO2_port.attrs = { + "long_name": "Dissolved Oxygen port side sensor", + "units": "Volts", + "comment": ("Analog Voltage Channel 3 - to be converted to umol/kg"), + } + self.combined_nc[f"{sensor}_dissolvedO2_port"] = dissolvedO2_port + ( + self.combined_nc[f"{sensor}_oxygen_mll_port"], + self.combined_nc[f"{sensor}_oxygen_umolkg_port"], + ) = self._calibrated_oxygen( + logs_dir, + sensor, + cf, + orig_nc, + "dissolvedO2_port", + temperature, + salinity, + "port", + ) + except KeyError: + self.logger.debug("No dissolvedO2_port data in %s", self.args.mission) + self.logger.debug("Collecting dissolvedO2_port") + try: + dissolvedO2_stbd = xr.DataArray( + orig_nc["dissolvedO2_stbd"], + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name="dissolvedO2_stbd", + ) + dissolvedO2_stbd.attrs = { + "long_name": "Dissolved Oxygen stbd side sensor", + "units": "Volts", + "comment": ("Analog Voltage Channel 5 - to be converted to umol/kg"), + } + self.combined_nc[f"{sensor}_dissolvedO2_stbd"] = dissolvedO2_stbd + ( + self.combined_nc[f"{sensor}_oxygen_mll_stbd"], + self.combined_nc[f"{sensor}_oxygen_umolkg_stbd"], + ) = self._calibrated_oxygen( + logs_dir, + sensor, + cf, + orig_nc, + "dissolvedO2_stbd", + temperature, + salinity, + "stbd", + ) + except KeyError: + self.logger.debug("No dissolvedO2_port data in %s", self.args.mission) + + # === flow variables === + # A lot of 0.0 values in Dorado missions until about 2020.282.01 + self.logger.debug("Collecting flow1") + try: + flow1 = xr.DataArray( + orig_nc["flow1"], + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name="flow1", + ) + flow1.attrs = { + "long_name": "Flow sensor on ctd1", + "units": "Volts", + "comment": f"flow1 from {source}", + } + self.combined_nc[f"{sensor}_flow1"] = flow1 + except KeyError: + self.logger.debug("No flow1 data in %s", self.args.mission) + self.logger.debug("Collecting flow2") + try: + flow2 = xr.DataArray( + orig_nc["flow2"], + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name="flow2", + ) + flow2.attrs = { + "long_name": "Flow sensor on ctd1", + "units": "Volts", + "comment": f"flow2 from {source}", + } + self.combined_nc[f"{sensor}_flow2"] = flow2 + except KeyError: + self.logger.debug("No flow2 data in %s", self.args.mission) + + # === beam_transmittance variable from seabird25p on i2map vehicle === + try: + beam_transmittance, _ = _beam_transmittance_from_volts( + self.combined_nc, + orig_nc, + ) + beam_transmittance = xr.DataArray( + beam_transmittance * 100.0, + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name="beam_transmittance", + ) + beam_transmittance.attrs = { + "long_name": "Beam Transmittance", + "units": "%", + "comment": ( + f"Calibrated Beam Transmittance from {source}'s transmissometer variable" + ), + } + self.combined_nc[f"{sensor}_beam_transmittance"] = beam_transmittance + + except KeyError: + self.logger.debug( + "No transmissometer data in %s/%s.nc", + self.args.mission, + sensor, + ) + + self.combined_nc[f"{sensor}_depth"] = self._geometric_depth_correction( + sensor, + orig_nc, + ) + out_fn = f"{self.args.auv_name}_{self.args.mission}_cal.nc" + self.combined_nc[f"{sensor}_depth"].attrs = { + "long_name": "Depth", + "units": "m", + "comment": ( + f"Variable depth_filtdepth from {out_fn} linearly interpolated" + f" to {sensor}_time and corrected for pitch using" + f" {self.sinfo[sensor]['sensor_offset']}" + ), + } + + # === PAR variable from ctd2 on dorado vehicle === + try: + par = xr.DataArray( + orig_nc["par"], + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name="par", + ) + par.attrs = { + "long_name": "Photosynthetically Available Radiation", + "units": "Volts", + "comment": f"PAR from {source}'s par variable", + } + self.combined_nc[f"{sensor}_par"] = par + + except KeyError: + self.logger.debug("No par data in %s/%s.nc", self.args.mission, sensor) + + self.combined_nc[f"{sensor}_depth"] = self._geometric_depth_correction( + sensor, + orig_nc, + ) + out_fn = f"{self.args.auv_name}_{self.args.mission}_cal.nc" + self.combined_nc[f"{sensor}_depth"].attrs = { + "long_name": "Depth", + "units": "m", + "comment": ( + f"Variable depth_filtdepth from {out_fn} linearly interpolated" + f" to {sensor}_time and corrected for pitch using" + f" {self.sinfo[sensor]['sensor_offset']}" + ), + } + + # === ad hoc Range checking === + self.logger.info( + "Performing range checking of %s in %s/%s.nc", vars_to_qc, self.args.mission, sensor + ) + self._range_qc_combined_nc( + instrument=sensor, + variables=vars_to_qc, + ranges={f"{sensor}_salinity": Range(30, 40)}, + set_to_nan=True, + ) + if self.args.mission == "2010.284.00": + self.logger.info( + "Removing points outside of time range for %s/%s.nc", self.args.mission, sensor + ) + self._range_qc_combined_nc( + instrument=sensor, + variables=[v for v in self.combined_nc.variables if v.startswith(sensor)], + ranges={ + f"{sensor}_time": Range( + pd.Timestamp(2010, 10, 11, 20, 0, 0), + pd.Timestamp(2010, 10, 12, 3, 28, 0), + ), + }, + ) + + def _tailcone_process(self, sensor): + # As requested by Rob Sherlock capture propRpm for comparison with + # mWaterSpeed from navigation.log + try: + orig_nc = getattr(self, sensor).orig_data + except FileNotFoundError as e: + self.logger.error("%s", e) # noqa: TRY400 + return + except AttributeError: + error_message = ( + f"{sensor} has no orig_data - likely a missing or zero-sized .log file" + f" in {Path(MISSIONLOGS, self.args.mission)}" + ) + raise EOFError(error_message) from None + + # Remove non-monotonic times + self.logger.debug("Checking for non-monotonic increasing times") + try: + monotonic = monotonic_increasing_time_indices(orig_nc.get_index("time")) + except IndexError: + error_message = "No data in tailcone.nc - likely empty tailcone.log file" + raise ValueError(error_message) from None + if (~monotonic).any(): + self.logger.debug( + "Removing non-monotonic increasing times at indices: %s", + np.argwhere(~monotonic).flatten(), + ) + orig_nc = orig_nc.sel(time=monotonic) + + source = self.sinfo[sensor]["data_filename"] + coord_str = f"{sensor}_time {sensor}_depth {sensor}_latitude {sensor}_longitude" + self.combined_nc["tailcone_propRpm"] = xr.DataArray( + orig_nc["propRpm"].to_numpy(), + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name=f"{sensor}_propRpm", + ) + self.combined_nc["tailcone_propRpm"].attrs = { + "long_name": "Vehicle propeller speed", + # Don't be confused by its name - propeller speed is logged in radians/sec. + "units": "rad/s", + "coordinates": coord_str, + "comment": f"propRpm from {source} (convert to RPM by multiplying by 9.549297)", + } + + def _ecopuck_process(self, sensor, cf): + # ecpouck's first mission 2020.245.00 - email dialog on 5 Dec 2022 discussing + # using it for developing an HS2 transfer function and comparison with LRAUV data + try: + orig_nc = getattr(self, sensor).orig_data + except FileNotFoundError as e: + self.logger.error("%s", e) # noqa: TRY400 + return + except AttributeError: + error_message = ( + f"{sensor} has no orig_data - likely a missing or zero-sized .log file" + f" in {Path(MISSIONLOGS, self.args.mission)}" + ) + raise EOFError(error_message) from None + + # Remove non-monotonic times + self.logger.debug("Checking for non-monotonic increasing times") + monotonic = monotonic_increasing_time_indices(orig_nc.get_index("time")) + if (~monotonic).any(): + self.logger.debug( + "Removing non-monotonic increasing times at indices: %s", + np.argwhere(~monotonic).flatten(), + ) + orig_nc = orig_nc.sel(time=monotonic) + + source = self.sinfo[sensor]["data_filename"] + coord_str = f"{sensor}_time {sensor}_depth {sensor}_latitude {sensor}_longitude" + beta_700 = cf.bbp700_scale_factor * (orig_nc["BB_Sig"].to_numpy() - cf.bbp700_dark_counts) + _, bbp = compute_backscatter(700, AVG_SALINITY, beta_700) # 33.6 + + self.combined_nc["ecopuck_bbp700"] = xr.DataArray( + bbp, + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name=f"{sensor}_bbp700", + ) + self.combined_nc["ecopuck_bbp700"].attrs = { + "long_name": "Particulate backscattering coefficient at 700 nm", + "units": "m-1", + "coordinates": coord_str, + "comment": ( + f"BB_Sig from {source} converted to beta_700 using scale factor " + f"{cf.bbp700_scale_factor} and dark counts {cf.bbp700_dark_counts}, " + "then converted to bbp700 by the compute_backscatter() function." + ), + } + + self.combined_nc["ecopuck_cdom"] = xr.DataArray( + cf.cdom_scale_factor * (orig_nc["CDOM_Sig"].to_numpy() - cf.cdom_dark_counts), + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name=f"{sensor}_cdom", + ) + self.combined_nc["ecopuck_cdom"].attrs = { + "long_name": "Colored Dissolved Organic Matter", + "units": "ppb", + "coordinates": coord_str, + "comment": ( + f"CDOM_Sig from {source} converted to cdom using scale factor " + f"{cf.cdom_scale_factor} and dark counts {cf.cdom_dark_counts}" + ), + } + + self.combined_nc["ecopuck_chl"] = xr.DataArray( + cf.chl_scale_factor * (orig_nc["Chl_Sig"].to_numpy() - cf.chl_dark_counts), + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name=f"{sensor}_chl", + ) + + # From: FLBBCD2K-3695_(CHL)CharSheet.pdf + # The relationship between fluorescence and chlorophyll-a concentrations in-situ is + # highly variable. The scale factor listed on this document was determined using a + # mono-culture of phytoplankton (Thalassiosira weissflogii). The population was + # assumed to be reasonably healthy and the concentration was determined by using the + # absorption method. To accurately determine chlorophyll concentration using a + # fluorometer, you must perform secondary measurements on the populations of + # interest. This is typically done using extraction-based measurement techniques on + # discrete samples. For additional information on determining chlorophyll + # concentration see "Standard Methods for the Examination of Water and Wastewater" + # part 10200 H, published jointly by the American Public Health Association, + # American Water Works Association, and the Water Environment ,)deration. + self.combined_nc["ecopuck_chl"].attrs = { + "long_name": "Chlorophyll", + "units": "ug/l", + "coordinates": coord_str, + "comment": ( + f"Chl_Sig from {source} converted to chl using scale factor " + f"{cf.chl_scale_factor} and dark counts {cf.chl_dark_counts}" + ), + } + + def _apply_plumbing_lag( + self, + sensor: str, + time_index: pd.DatetimeIndex, + time_name: str, + ) -> tuple[xr.DataArray, str]: + """ + Apply plumbing lag to a time index in the combined netCDF file. + """ + # Convert lag_secs to milliseconds as np.timedelta64 neeeds an integer + lagged_time = time_index - np.timedelta64( + int(self.sinfo[sensor]["lag_secs"] * 1000), + "ms", + ) + # Need to update the sensor's time coordinate in the combined netCDF file + # so that DataArrays created with lagged_time fit onto the coordinate + self.combined_nc.coords[f"{sensor}_{time_name}"] = xr.DataArray( + lagged_time, + coords=[lagged_time], + dims={f"{sensor}_{time_name}"}, + name=f"{sensor}_{time_name}", + ) + lag_info = f"with plumbing lag correction of {self.sinfo[sensor]['lag_secs']} seconds" + return lagged_time, lag_info + + def _biolume_process(self, sensor): + try: + orig_nc = getattr(self, sensor).orig_data + except FileNotFoundError as e: + self.logger.error("%s", e) # noqa: TRY400 + return + except AttributeError: + error_message = ( + f"{sensor} has no orig_data - likely a missing or zero-sized .log file" + f" in {Path(MISSIONLOGS, self.args.mission)}" + ) + raise EOFError(error_message) from None + + # Remove non-monotonic times + self.logger.debug("Checking for non-monotonic increasing time") + monotonic = monotonic_increasing_time_indices(orig_nc.get_index("time")) + if (~monotonic).any(): + self.logger.debug( + "Removing non-monotonic increasing time at indices: %s", + np.argwhere(~monotonic).flatten(), + ) + orig_nc = orig_nc.sel({TIME: monotonic}) + + self.logger.info("Checking for non-monotonic increasing %s", TIME60HZ) + monotonic = monotonic_increasing_time_indices(orig_nc.get_index(TIME60HZ)) + if (~monotonic).any(): + self.logger.info( + "Removing non-monotonic increasing %s at indices: %s", + TIME60HZ, + np.argwhere(~monotonic).flatten(), + ) + orig_nc = orig_nc.sel({TIME60HZ: monotonic}) + + self.combined_nc[f"{sensor}_depth"] = self._geometric_depth_correction( + sensor, + orig_nc, + ) + + source = self.sinfo[sensor]["data_filename"] + self.combined_nc["biolume_flow"] = xr.DataArray( + orig_nc["flow"].to_numpy() * self.sinfo["biolume"]["flow_conversion"], + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name=f"{sensor}_flow", + ) + self.combined_nc["biolume_flow"].attrs = { + "long_name": "Bioluminesence pump flow rate", + "units": "mL/s", + "coordinates": f"{sensor}_time {sensor}_depth", + "comment": f"flow from {source}", + } + + lagged_time, lag_info = self._apply_plumbing_lag( + sensor, + orig_nc.get_index(TIME), + TIME, + ) + self.combined_nc["biolume_avg_biolume"] = xr.DataArray( + orig_nc["avg_biolume"].to_numpy(), + coords=[lagged_time], + dims={f"{sensor}_{TIME}"}, + name=f"{sensor}_avg_biolume", + ) + self.combined_nc["biolume_avg_biolume"].attrs = { + "long_name": "Bioluminesence Average of 60Hz data", + "units": "photons s^-1", + "coordinates": f"{sensor}_{TIME} {sensor}_depth", + "comment": f"avg_biolume from {source} {lag_info}", + } + + lagged_time, lag_info = self._apply_plumbing_lag( + sensor, + orig_nc.get_index(TIME60HZ), + TIME60HZ, + ) + self.combined_nc["biolume_raw"] = xr.DataArray( + orig_nc["raw"].to_numpy(), + coords=[lagged_time], + dims={f"{sensor}_{TIME60HZ}"}, + name=f"{sensor}_raw", + ) + self.combined_nc["biolume_raw"].attrs = { + "long_name": "Raw 60 hz biolume data", + # xarray writes out its own units attribute + "coordinates": f"{sensor}_{TIME60HZ} {sensor}_depth60hz", + "comment": f"raw values from {source} {lag_info}", + } + if self.args.mission == "2010.284.00": + self.logger.info( + "Removing points outside of time range for %s/biolume.nc", self.args.mission + ) + for time_axis in (TIME, TIME60HZ): + self._range_qc_combined_nc( + instrument=sensor, + variables=[ + "biolume_time", + "biolume_time60hz", + "biolume_depth", + "biolume_flow", + "biolume_avg_biolume", + "biolume_raw", + ], + ranges={ + f"{sensor}_{time_axis}": Range( + pd.Timestamp(2010, 10, 11, 20, 0, 0), + pd.Timestamp(2010, 10, 12, 3, 28, 0), + ), + }, + set_to_nan=True, + ) + + def _lopc_process(self, sensor): + try: + orig_nc = getattr(self, sensor).orig_data + except FileNotFoundError as e: + self.logger.error("%s", e) # noqa: TRY400 + return + except AttributeError: + error_message = ( + f"{sensor} has no orig_data - likely a missing or zero-sized .log file" + f" in {Path(MISSIONLOGS, self.args.mission)}" + ) + raise EOFError(error_message) from None + + source = self.sinfo[sensor]["data_filename"] + coord_str = f"{sensor}_time {sensor}_depth {sensor}_latitude {sensor}_longitude" + + # A lopc.nc file without a time variable will return a RangeIndex object + # from orig_nc.get_index('time') - test for presence of actual 'time' coordinate + if "time" not in orig_nc.coords: + error_message = ( + f"{sensor} has no time coordinate - likely an incomplete lopc.nc file" + f" in {Path(MISSIONLOGS, self.args.mission)}" + ) + raise EOFError(error_message) + + self.combined_nc["lopc_countListSum"] = xr.DataArray( + orig_nc["countListSum"].to_numpy(), + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name=f"{sensor}_countListSum", + ) + self.combined_nc["lopc_countListSum"].attrs = { + "long_name": orig_nc["countListSum"].attrs["long_name"], + "units": orig_nc["countListSum"].attrs["units"], + "coordinates": coord_str, + "comment": f"Sum of countListSum values by size class from {source}", + } + + self.combined_nc["lopc_transCount"] = xr.DataArray( + orig_nc["transCount"].to_numpy(), + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name=f"{sensor}_transCount", + ) + self.combined_nc["lopc_transCount"].attrs = { + "long_name": orig_nc["transCount"].attrs["long_name"], + "units": orig_nc["transCount"].attrs["units"], + "coordinates": coord_str, + "comment": f"transCount from {source}", + } + + self.combined_nc["lopc_nonTransCount"] = xr.DataArray( + orig_nc["nonTransCount"].to_numpy(), + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name=f"{sensor}_nonTransCount", + ) + self.combined_nc["lopc_nonTransCount"].attrs = { + "long_name": orig_nc["nonTransCount"].attrs["long_name"], + "units": orig_nc["nonTransCount"].attrs["units"], + "coordinates": coord_str, + "comment": f"nonTransCount from {source}", + } + + self.combined_nc["lopc_LCcount"] = xr.DataArray( + orig_nc["LCcount"].to_numpy(), + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name=f"{sensor}_LCcount", + ) + self.combined_nc["lopc_LCcount"].attrs = { + "long_name": orig_nc["LCcount"].attrs["long_name"], + "units": orig_nc["LCcount"].attrs["units"], + "coordinates": coord_str, + "comment": f"LCcount from {source}", + } + + self.combined_nc["lopc_flowSpeed"] = xr.DataArray( + orig_nc["flowSpeed"].to_numpy(), + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name=f"{sensor}_flowSpeed", + ) + self.combined_nc["lopc_flowSpeed"].attrs = { + "long_name": orig_nc["flowSpeed"].attrs["long_name"], + "units": orig_nc["flowSpeed"].attrs["units"], + "coordinates": coord_str, + "comment": f"flowSpeed from {source}", + } + + def _isus_process(self, sensor): + try: + orig_nc = getattr(self, sensor).orig_data + except FileNotFoundError as e: + self.logger.error("%s", e) # noqa: TRY400 + return + except AttributeError: + error_message = ( + f"{sensor} has no orig_data - likely a missing or zero-sized .log file" + f" in {Path(MISSIONLOGS, self.args.mission)}" + ) + raise EOFError(error_message) from None + + # Remove non-monotonic times + self.logger.debug("Checking for non-monotonic increasing times") + monotonic = monotonic_increasing_time_indices(orig_nc.get_index("time")) + if (~monotonic).any(): + self.logger.debug( + "Removing non-monotonic increasing times at indices: %s", + np.argwhere(~monotonic).flatten(), + ) + orig_nc = orig_nc.sel(time=monotonic) + + source = self.sinfo[sensor]["data_filename"] + coord_str = f"{sensor}_time {sensor}_depth {sensor}_latitude {sensor}_longitude" + + self.combined_nc["isus_nitrate"] = xr.DataArray( + orig_nc["isusNitrate"].to_numpy(), + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name=f"{sensor}_nitrate", + ) + self.combined_nc["isus_nitrate"].attrs = { + "long_name": "Nitrate", + "units": "micromoles/liter", + "coordinates": coord_str, + "comment": f"isusNitrate from {source}", + } + self.combined_nc["isus_temp"] = xr.DataArray( + orig_nc["isusTemp"].to_numpy(), + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name=f"{sensor}_temp", + ) + self.combined_nc["isus_temp"].attrs = { + "long_name": "Temperature from ISUS", + "units": "Celsius", + "coordinates": coord_str, + "comment": f"isusTemp from {source}", + } + self.combined_nc["isus_quality"] = xr.DataArray( + orig_nc["isusQuality"].to_numpy(), + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name=f"{sensor}_quality", + ) + self.combined_nc["isus_quality"].attrs = { + "long_name": "Fit Residuals from ISUS", + "units": "", + "coordinates": coord_str, + "comment": f"isusQuality from {source}", + } + + def _geometric_depth_correction(self, sensor, orig_nc): + """Performs the align_geom() function from the legacy Matlab. + Works for any sensor, but requires navigation being processed first + as its variables in combined_nc are required. Returns corrected depth + array. + """ + # Fix pitch values to first and last points for interpolation to time + # values outside the range of the pitch values. + # See https://stackoverflow.com/a/45446546 + # and https://github.com/scipy/scipy/issues/12707#issuecomment-672794335 + try: + p_interp = interp1d( + self.combined_nc["navigation_time"].to_numpy().tolist(), + self.combined_nc["navigation_pitch"].to_numpy(), + fill_value=( + self.combined_nc["navigation_pitch"].to_numpy()[0], + self.combined_nc["navigation_pitch"].to_numpy()[-1], + ), + bounds_error=False, + ) + except KeyError: + error_message = "No navigation_time or navigation_pitch in combined_nc." + raise EOFError(error_message) from None + pitch = p_interp(orig_nc["time"].to_numpy().tolist()) + + d_interp = interp1d( + self.combined_nc["depth_time"].to_numpy().tolist(), + self.combined_nc["depth_filtdepth"].to_numpy(), + fill_value=( + self.combined_nc["depth_filtdepth"].to_numpy()[0], + self.combined_nc["depth_filtdepth"].to_numpy()[-1], + ), + bounds_error=False, + ) + orig_depth = d_interp(orig_nc["time"].to_numpy().tolist()) + offs_depth = align_geom(self.sinfo[sensor]["sensor_offset"], pitch) + + corrected_depth = xr.DataArray( + (orig_depth - offs_depth).astype(np.float64).tolist(), + coords=[orig_nc.get_index("time")], + dims={f"{sensor}_time"}, + name=f"{sensor}_depth", + ) + # 2008.289.03 has self.combined_nc["depth_time"][-1] (2008-10-16T15:42:32) + # at lot less than orig_nc["time"][-1] (2008-10-16T16:24:43) + # which, with "extrapolate" causes wildly incorrect depths to -359 m + # There may be other cases where this happens, in which case we'd like + # a general solution. For now, we'll just correct this mission. + d_beg_time_diff = ( + orig_nc["time"].to_numpy()[0] - self.combined_nc["depth_time"].to_numpy()[0] + ) + d_end_time_diff = ( + orig_nc["time"].to_numpy()[-1] - self.combined_nc["depth_time"].to_numpy()[-1] + ) + self.logger.info( + "%s: d_beg_time_diff: %s, d_end_time_diff: %s", + sensor, + d_beg_time_diff.astype("timedelta64[s]"), + d_end_time_diff.astype("timedelta64[s]"), + ) + if self.args.mission in ( + "2008.289.03", + "2010.259.01", + "2010.259.02", + ): + # This could be a more general check for all missions, but let's restrict it + # to known problematic missions for now. The above info message can help + # determine if this is needed for other missions. + self.logger.info( + "%s: Special QC for mission %s: Setting corrected_depth to NaN for times after %s", + sensor, + self.args.mission, + self.combined_nc["depth_time"][-1].to_numpy(), + ) + corrected_depth[ + np.where( + orig_nc.get_index("time") > self.combined_nc["depth_time"].to_numpy()[-1], + ) + ] = np.nan + if self.args.plot: + plt.figure(figsize=(18, 6)) + plt.plot( + orig_nc["time"].to_numpy(), + orig_depth, + "-", + orig_nc["time"].to_numpy(), + corrected_depth, + "--", + orig_nc["time"].to_numpy(), + pitch, + ".", + ) + plt.ylabel("Depth (m) & Pitch (deg)") + plt.legend(("Original depth", "Pitch corrected depth", "Pitch")) + plt.title( + f"Original and pitch corrected depth for {self.args.auv_name} {self.args.mission}", + ) + plt.show() + + return corrected_depth + + def _process(self, sensor, logs_dir, netcdfs_dir): # noqa: C901, PLR0912 + coeffs = None + try: + coeffs = getattr(self, sensor).cals + except AttributeError as e: + self.logger.debug("No calibration information for %s: %s", sensor, e) + + if sensor == "navigation": + self._navigation_process(sensor) + elif sensor == "gps": + self._gps_process(sensor) + elif sensor == "depth": + self._depth_process(sensor) + elif sensor == "ecopuck": + self._ecopuck_process(sensor, coeffs) + elif sensor == "hs2": + self._hs2_process(sensor, logs_dir) + elif sensor == "tailcone": + self._tailcone_process(sensor) + elif sensor == "lopc": + self._lopc_process(sensor) + elif sensor == "isus": + self._isus_process(sensor) + elif sensor in ("ctd1", "ctd2", "seabird25p"): + if coeffs is not None: + self._ctd_process(logs_dir, sensor, coeffs) + elif hasattr(getattr(self, sensor), "orig_data"): + self.logger.warning("No calibration information for %s", sensor) + elif sensor == "biolume": + self._biolume_process(sensor) + elif hasattr(getattr(self, sensor), "orig_data"): + self.logger.warning("No method (yet) to process %s", sensor) + + def write_netcdf(self, netcdfs_dir, vehicle: str = "", name: str = "") -> None: + name = name or self.args.mission + vehicle = vehicle or self.args.auv_name + self.combined_nc.attrs = self.global_metadata() + out_fn = Path(netcdfs_dir, f"{vehicle}_{name}_cal.nc") + self.logger.info("Writing calibrated instrument data to %s", out_fn) + if Path(out_fn).exists(): + Path(out_fn).unlink() + self.combined_nc.to_netcdf(out_fn) + self.logger.info( + "Data variables written: %s", + ", ".join(sorted(self.combined_nc.variables)), + ) + + def process_logs(self, vehicle: str = "", name: str = "", process_gps: bool = True) -> None: # noqa: FBT001, FBT002 + name = name or self.args.mission + vehicle = vehicle or self.args.auv_name + logs_dir = Path(self.args.base_path, vehicle, MISSIONLOGS, name) + netcdfs_dir = Path(self.args.base_path, vehicle, MISSIONNETCDFS, name) + start_datetime = datetime.strptime(".".join(name.split(".")[:2]), "%Y.%j").astimezone( + UTC, + ) + self._define_sensor_info(start_datetime) + self._read_data(logs_dir, netcdfs_dir) + self.combined_nc = xr.Dataset() + + for sensor in self.sinfo: + if not process_gps and sensor == "gps": + continue # to skip gps processing in conftest.py fixture + getattr(self, sensor).cal_align_data = xr.Dataset() + self.logger.debug("Processing %s %s %s", vehicle, name, sensor) + try: + self._process(sensor, logs_dir, netcdfs_dir) + except EOFError as e: + short_name = vehicle.lower() + if vehicle == "Dorado389": + # For supporting pytest & conftest.py fixture + short_name = "dorado" + if sensor in EXPECTED_SENSORS[short_name]: + self.logger.error("Error processing %s: %s", sensor, e) # noqa: TRY400 + else: + self.logger.debug("Error processing %s: %s", sensor, e) + except ValueError: + self.logger.exception("Error processing %s", sensor) + except KeyError as e: + self.logger.error("Error processing %s: missing variable %s", sensor, e) # noqa: TRY400 + + return netcdfs_dir + + def process_command_line(self): + examples = "Examples:" + "\n\n" + examples += " Calibrate original data for some missions:\n" + examples += " " + sys.argv[0] + " --mission 2020.064.10\n" + examples += " " + sys.argv[0] + " --auv_name i2map --mission 2020.055.01\n" + + parser = argparse.ArgumentParser( + formatter_class=RawTextHelpFormatter, + description=__doc__, + epilog=examples, + ) + + parser.add_argument( + "--base_path", + action="store", + default=BASE_PATH, + help=f"Base directory for missionlogs and missionnetcdfs, default: {BASE_PATH}", + ) + parser.add_argument( + "--auv_name", + action="store", + default="Dorado389", + help="Dorado389 (default), i2MAP, or Multibeam", + ) + parser.add_argument( + "--mission", + action="store", + help="Mission directory, e.g.: 2020.064.10", + ) + parser.add_argument( + "--noinput", + action="store_true", + help="Execute without asking for a response, e.g. to not ask to re-download file", + ) + parser.add_argument( + "--plot", + action="store", + help="Create intermediate plots" + " to validate data operations. Use first to plot " + " points, e.g. first2000. Program blocks upon show.", + ) + parser.add_argument( + "-v", + "--verbose", + type=int, + choices=range(3), + action="store", + default=0, + const=1, + nargs="?", + help="verbosity level: " + + ", ".join( + [f"{i}: {v}" for i, v in enumerate(("WARN", "INFO", "DEBUG"))], + ), + ) + + self.args = parser.parse_args() + self.logger.setLevel(self._log_levels[self.args.verbose]) + + self.commandline = " ".join(sys.argv) + + +if __name__ == "__main__": + cal_netcdf = Calibrate_NetCDF() + cal_netcdf.process_command_line() + cal_netcdf.calibration_dir = "/Volumes/DMO/MDUC_CORE_CTD_200103/Calibration Files" + p_start = time.time() + # Set process_gps=False to skip time consuming _nudge_pos() processing + # netcdf_dir = cal_netcdf.process_logs(process_gps=False) + netcdf_dir = cal_netcdf.process_logs() + cal_netcdf.write_netcdf(netcdf_dir) + cal_netcdf.logger.info("Time to process: %.2f seconds", (time.time() - p_start)) From d749bf97d83f94c53baca37ae8bf6a308cf679fe Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 15 Oct 2025 12:26:54 -0700 Subject: [PATCH 011/121] Refactor extract_groups_to_files_netcdf4() into more readable methods. --- src/data/nc42netcdfs.py | 290 ++++++++++++++++++++-------------------- 1 file changed, 148 insertions(+), 142 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 109f224c..eda7ae2d 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -13,6 +13,7 @@ import os import sys from pathlib import Path +from typing import Any import netCDF4 import pooch @@ -180,165 +181,170 @@ def get_groups_netcdf4(self, file_path): with netCDF4.Dataset(file_path, "r") as dataset: return list(dataset.groups.keys()) - def extract_groups_to_files_netcdf4(self, input_file, output_dir): # noqa: C901, PLR0912, PLR0915 + def extract_groups_to_files_netcdf4(self, input_file, output_dir): """Extract each group to a separate NetCDF file using netCDF4 library. + The xarray library fails reading the WetLabsBB2FL group from this file: brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4 with garbled data for the serial variable (using ncdump): serial = "$F!{<8D>\031@7\024[P]\001\030" ; - but netCDF4 can skip over it and read the rest of the variables.""" + but netCDF4 can skip over it and read the rest of the variables. + """ output_dir = Path(output_dir) output_dir.mkdir(exist_ok=True, parents=True) - # Read variables from the "/" (root) group and save them to a file named "Universals.nc" - with netCDF4.Dataset(input_file, "r") as src_dataset: - root_group = src_dataset - root_parms = SCIENG_PARMS.get("/", []) - if root_parms: - try: - self.logger.info("Extracting root group '/'") - # Get variables to extract - parms = [p["name"] for p in root_parms if "name" in p] - self.logger.debug(" Variables to extract: %s", parms) - - # Check which variables actually exist in the group - available_vars = list(root_group.variables.keys()) - vars_to_extract = [var for var in parms if var in available_vars] - - if vars_to_extract: - output_file = output_dir / "Universals.nc" - # Need to use NETCDF4 as we have multiple unlimited dimensions - with netCDF4.Dataset(output_file, "w", format="NETCDF4") as dst_dataset: - # Copy global attributes from source group - for attr_name in root_group.ncattrs(): - dst_dataset.setncattr(attr_name, root_group.getncattr(attr_name)) - - # Copy dimensions that are used by the variables we want - dims_needed = set() - for var_name in vars_to_extract: - var = root_group.variables[var_name] - dims_needed.update(var.dimensions) - - for dim_name in dims_needed: - if dim_name in root_group.dimensions: - src_dim = root_group.dimensions[dim_name] - size = len(src_dim) if not src_dim.isunlimited() else None - dst_dataset.createDimension(dim_name, size) - - # Copy coordinate variables first (if they exist) - coord_vars = [] - for dim_name in dims_needed: - if dim_name in root_group.variables: - coord_vars.append(dim_name) # noqa: PERF401 - - # Copy coordinate variables - for var_name in coord_vars: - if var_name not in vars_to_extract: - self._copy_variable(root_group, dst_dataset, var_name) - - # Copy requested variables - for var_name in vars_to_extract: - self._copy_variable(root_group, dst_dataset, var_name) - - self.logger.info("Extracted root group '/' to %s", output_file) - else: - self.logger.warning("No requested variables found in root group '/'") - except (FileNotFoundError, OSError, ValueError) as e: - self.logger.warning("Could not extract root group '/': %s", e) - except KeyError as e: - self.logger.warning("Variable %s not found in root group '/'", e) + self.logger.info("Extracting data from %s", input_file) with netCDF4.Dataset(input_file, "r") as src_dataset: + # Extract root group first + self._extract_root_group(src_dataset, output_dir) + + # Extract all other groups all_groups = list(src_dataset.groups.keys()) + for group_name in SCIENG_PARMS: + if group_name == "/" or group_name not in all_groups: + if group_name != "/" and group_name not in all_groups: + self.logger.warning("Group %s not found in %s", group_name, input_file) + continue + self._extract_single_group(src_dataset, group_name, output_dir) - self.logger.info("Extracting data from %s", input_file) + def _extract_root_group(self, src_dataset: netCDF4.Dataset, output_dir: Path): + """Extract variables from the root group to Universals.nc.""" + root_parms = SCIENG_PARMS.get("/", []) + if not root_parms: + return - for group_name, group_parms in SCIENG_PARMS.items(): - if group_name not in all_groups: - self.logger.warning("Group %s not found in %s", group_name, input_file) - continue + try: + self.logger.info("Extracting root group '/'") + vars_to_extract = self._get_available_variables(src_dataset, root_parms) - try: - self.logger.info(" Group %s", group_name) - src_group = src_dataset.groups[group_name] - - # Get variables to extract - parms = [p["name"] for p in group_parms if "name" in p] - self.logger.debug(" Variables to extract: %s", parms) - - # Check which variables actually exist in the group - available_vars = list(src_group.variables.keys()) - vars_to_extract = [var for var in parms if var in available_vars] - - if not vars_to_extract: - self.logger.warning("No requested variables found in group %s", group_name) - continue - - # Create output file - output_file = output_dir / f"{group_name}.nc" - - with netCDF4.Dataset(output_file, "w", format="NETCDF4") as dst_dataset: - # Copy global attributes from source group - for attr_name in src_group.ncattrs(): - dst_dataset.setncattr(attr_name, src_group.getncattr(attr_name)) - - # Copy dimensions that are used by the variables we want - dims_needed = set() - for var_name in vars_to_extract: - var = src_group.variables[var_name] - dims_needed.update(var.dimensions) - - for dim_name in dims_needed: - if dim_name in src_group.dimensions: - src_dim = src_group.dimensions[dim_name] - size = len(src_dim) if not src_dim.isunlimited() else None - dst_dataset.createDimension(dim_name, size) - - # Copy coordinate variables first (if they exist) - coord_vars = [] - for dim_name in dims_needed: - if dim_name in src_group.variables: - coord_vars.append(dim_name) # noqa: PERF401 - - # Copy coordinate variables - for var_name in coord_vars: - if var_name not in vars_to_extract: - self._copy_variable(src_group, dst_dataset, var_name) - - # Copy requested variables - for var_name in vars_to_extract: - self._copy_variable(src_group, dst_dataset, var_name) - - self.logger.info("Extracted %s to %s", group_name, output_file) - - except (FileNotFoundError, OSError, ValueError) as e: - self.logger.warning("Could not extract %s: %s", group_name, e) - except KeyError as e: - self.logger.warning("Variable %s not found in group %s", e, group_name) - - def _copy_variable(self, src_group, dst_dataset, var_name): - """Helper method to copy a variable from source to destination.""" - src_var = src_group.variables[var_name] - - # Create variable in destination - dst_var = dst_dataset.createVariable( - var_name, - src_var.dtype, - src_var.dimensions, - zlib=True, # Enable compression - complevel=6, - shuffle=True, - fletcher32=True, - ) + if vars_to_extract: + output_file = output_dir / "Universals.nc" + self._create_netcdf_file(src_dataset, vars_to_extract, output_file) + self.logger.info("Extracted root group '/' to %s", output_file) + else: + self.logger.warning("No requested variables found in root group '/'") + + except Exception as e: # noqa: BLE001 + self.logger.warning("Could not extract root group '/': %s", e) + + def _extract_single_group( + self, src_dataset: netCDF4.Dataset, group_name: str, output_dir: Path + ): + """Extract a single group to its own NetCDF file.""" + group_parms = SCIENG_PARMS[group_name] - # Copy data - dst_var[:] = src_var[:] + try: + self.logger.info(" Group %s", group_name) + src_group = src_dataset.groups[group_name] - # Copy variable attributes - for attr_name in src_var.ncattrs(): - dst_var.setncattr(attr_name, src_var.getncattr(attr_name)) + vars_to_extract = self._get_available_variables(src_group, group_parms) - self.logger.debug(" Copied variable: %s", var_name) + if vars_to_extract: + output_file = output_dir / f"{group_name}.nc" + self._create_netcdf_file(src_group, vars_to_extract, output_file) + self.logger.info("Extracted %s to %s", group_name, output_file) + else: + self.logger.warning("No requested variables found in group %s", group_name) + + except KeyError: + self.logger.warning("Group %s not found", group_name) + except Exception as e: # noqa: BLE001 + self.logger.warning("Could not extract %s: %s", group_name, e) + + def _get_available_variables( + self, src_group: netCDF4.Group, group_parms: list[dict[str, Any]] + ) -> list[str]: + """Get the intersection of requested and available variables.""" + requested_vars = [p["name"] for p in group_parms if "name" in p] + available_vars = list(src_group.variables.keys()) + vars_to_extract = [var for var in requested_vars if var in available_vars] + + self.logger.debug(" Variables to extract: %s", vars_to_extract) + return vars_to_extract + + def _create_netcdf_file( + self, src_group: netCDF4.Group, vars_to_extract: list[str], output_file: Path + ): + """Create a new NetCDF file with the specified variables.""" + with netCDF4.Dataset(output_file, "w", format="NETCDF4") as dst_dataset: + # Copy global attributes + self._copy_global_attributes(src_group, dst_dataset) + + # Create dimensions + dims_needed = self._get_required_dimensions(src_group, vars_to_extract) + self._create_dimensions(src_group, dst_dataset, dims_needed) + + # Copy coordinate variables + coord_vars = self._get_coordinate_variables(src_group, dims_needed, vars_to_extract) + for var_name in coord_vars: + self._copy_variable(src_group, dst_dataset, var_name) + + # Copy requested variables + for var_name in vars_to_extract: + self._copy_variable(src_group, dst_dataset, var_name) + + def _copy_global_attributes(self, src_group: netCDF4.Group, dst_dataset: netCDF4.Dataset): + """Copy global attributes from source to destination.""" + for attr_name in src_group.ncattrs(): + dst_dataset.setncattr(attr_name, src_group.getncattr(attr_name)) + + def _get_required_dimensions( + self, src_group: netCDF4.Group, vars_to_extract: list[str] + ) -> set[str]: + """Get all dimensions needed by the variables to extract.""" + dims_needed = set() + for var_name in vars_to_extract: + if var_name in src_group.variables: + var = src_group.variables[var_name] + dims_needed.update(var.dimensions) + return dims_needed + + def _create_dimensions( + self, src_group: netCDF4.Group, dst_dataset: netCDF4.Dataset, dims_needed: set[str] + ): + """Create dimensions in the destination dataset.""" + for dim_name in dims_needed: + if dim_name in src_group.dimensions: + src_dim = src_group.dimensions[dim_name] + size = len(src_dim) if not src_dim.isunlimited() else None + dst_dataset.createDimension(dim_name, size) + + def _get_coordinate_variables( + self, src_group: netCDF4.Group, dims_needed: set[str], vars_to_extract: list[str] + ) -> list[str]: + """Get coordinate variables that aren't already in vars_to_extract.""" + coord_vars = [] + for dim_name in dims_needed: + if dim_name in src_group.variables and dim_name not in vars_to_extract: + coord_vars.append(dim_name) # noqa: PERF401 + return coord_vars + + def _copy_variable(self, src_group: netCDF4.Group, dst_dataset: netCDF4.Dataset, var_name: str): + """Helper method to copy a variable from source to destination.""" + try: + src_var = src_group.variables[var_name] + + # Create variable in destination + dst_var = dst_dataset.createVariable( + var_name, + src_var.dtype, + src_var.dimensions, + zlib=True, + complevel=6, + shuffle=True, + fletcher32=True, + ) + + # Copy data and attributes + dst_var[:] = src_var[:] + for attr_name in src_var.ncattrs(): + dst_var.setncattr(attr_name, src_var.getncattr(attr_name)) + + self.logger.debug(" Copied variable: %s", var_name) + + except Exception as e: # noqa: BLE001 + self.logger.warning("Failed to copy variable %s: %s", var_name, e) def extract_groups_to_files(self, input_file, output_dir): """Extract each group to a separate NetCDF file.""" From 405d8bca83518bb65b6197915c4b948d7b13408c Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 15 Oct 2025 12:27:49 -0700 Subject: [PATCH 012/121] WIP: Begin changing for use with LRAUV data. --- src/data/combine.py | 60 +++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index 704597e4..0a53bf69 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -1,31 +1,29 @@ #!/usr/bin/env python """ -Calibrate original data and produce NetCDF file for mission - -Read original data from netCDF files created by logs2netcdfs.py, apply -calibration information in .cfg and .xml files associated with the -original .log files and write out a single netCDF file with the important -variables at original sampling intervals. Geometric alignment and plumbing lag -corrections are also done during this step. The file will contain combined -variables (the combined_nc member variable) and be analogous to the original -netCDF4 files produced by MBARI's LRAUVs. Rather than using groups in netCDF-4 -the data will be written in classic netCDF-CF with a naming syntax that mimics -the LRAUV group naming convention with the coordinates for each sensor: +Combine original LRAUV data from separate .nc files and produce a single NetCDF +file that also contains corrected (nudged) latitudes and longitudes. + +Read original data from netCDF files created by nc42netcdfs.py and write out a +single netCDF file with the important variables at original sampling intervals. +Geometric alignment and any plumbing lag corrections are also done during this +step. The file will contain combined variables (the combined_nc member variable) +and be analogous to the original netCDF4. Rather than using groups in netCDF-4 +the data will be written in classic netCDF-CF with a naming convention that is +similar to Dorado data, with group names (without underscores) preceeding the +variable name with an underscore: ``` - _ - _<..........> - _ - _time - _depth - _latitude - _longitude + _ + _<..........> + _ + _time + _depth + _latitude + _longitude ``` -Note: The name "sensor" is used here, but it's really more aligned -with the concept of "instrument" in SSDS parlance. """ __author__ = "Mike McCann" -__copyright__ = "Copyright 2020, Monterey Bay Aquarium Research Institute" +__copyright__ = "Copyright 2025, Monterey Bay Aquarium Research Institute" import argparse import logging @@ -73,9 +71,19 @@ class Range(NamedTuple): max: float -# Using lower case vehicle names, modify in _define_sensor_info() for changes over time -# Used to reduce ERROR & WARNING log messages for expected missing sensor data -EXPECTED_SENSORS = { +# Using lower case vehicle names, modify in _define_sensor_info() for changes +# over time Used to reduce ERROR & WARNING log messages for expected missing +# sensor data. There are core data common to most all vehicles, whose groups +# are listed in BASE_GROUPS. EXPECTED_GROUPS contains additional groups for +# specific vehicles. +BASE_GROUPS = { + "lrauv": [ + "CTDSeabird", + "WetLabsBB2FL", + ], +} + +EXPECTED_GROUPS = { "dorado": [ "navigation", "gps", @@ -99,7 +107,7 @@ class Range(NamedTuple): ], } # Used in test fixture in conftetst.py -EXPECTED_SENSORS["Dorado389"] = EXPECTED_SENSORS["dorado"] +EXPECTED_GROUPS["Dorado389"] = EXPECTED_GROUPS["dorado"] def align_geom(sensor_offset, pitches): @@ -3538,7 +3546,7 @@ def process_logs(self, vehicle: str = "", name: str = "", process_gps: bool = Tr if vehicle == "Dorado389": # For supporting pytest & conftest.py fixture short_name = "dorado" - if sensor in EXPECTED_SENSORS[short_name]: + if sensor in EXPECTED_GROUPS[short_name]: self.logger.error("Error processing %s: %s", sensor, e) # noqa: TRY400 else: self.logger.debug("Error processing %s: %s", sensor, e) From b2858955851f9548a04c6b32f4b61a76d1cebc30 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 15 Oct 2025 15:17:33 -0700 Subject: [PATCH 013/121] Factor nudge_positions() out of calibrate.py so that combine.py can use it. --- src/data/AUV.py | 354 +++++++++++++++++++++++++++++++++++++++++- src/data/calibrate.py | 306 +++--------------------------------- src/data/combine.py | 307 +++--------------------------------- 3 files changed, 386 insertions(+), 581 deletions(-) diff --git a/src/data/AUV.py b/src/data/AUV.py index ba1fa8fa..75accb25 100755 --- a/src/data/AUV.py +++ b/src/data/AUV.py @@ -8,11 +8,12 @@ MBARI 30 March 2020 """ -import sys +import logging from datetime import UTC, datetime import coards import numpy as np +import xarray as xr def monotonic_increasing_time_indices(time_array: np.array) -> np.ndarray: @@ -48,9 +49,350 @@ def add_global_metadata(self): ) self.nc_file.distribution_statement = "Any use requires prior approval from MBARI" - self.nc_file.license = self.nc_file.distribution_statement - self.nc_file.useconst = "Not intended for legal use. Data may contain inaccuracies." - self.nc_file.history = 'Created by "{}" on {}'.format( - " ".join(sys.argv), - iso_now, + + +def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 + nav_longitude: xr.DataArray, + nav_latitude: xr.DataArray, + gps_longitude: xr.DataArray, + gps_latitude: xr.DataArray, + logger: logging.Logger, + auv_name: str = "", + mission: str = "", + max_sec_diff_at_end: int = 10, + create_plots: bool = False, # noqa: FBT001, FBT002 +) -> tuple[xr.DataArray, xr.DataArray, int, float]: + """ + Apply linear nudges to underwater latitudes and longitudes so that + they match the surface GPS positions. + + Parameters: + ----------- + nav_longitude : xr.DataArray + Navigation longitude data (dead reckoned) + nav_latitude : xr.DataArray + Navigation latitude data (dead reckoned) + gps_longitude : xr.DataArray + GPS longitude fixes + gps_latitude : xr.DataArray + GPS latitude fixes + logger : logging.Logger + Logger for output messages + auv_name : str, optional + AUV name for plot titles + mission : str, optional + Mission name for plot titles + max_sec_diff_at_end : int, optional + Maximum allowable time difference at segment end (default: 10) + create_plots : bool, optional + Whether to create debug plots (default: False) + + Returns: + -------- + tuple[xr.DataArray, xr.DataArray, int, float] + nudged_longitude, nudged_latitude, segment_count, segment_minsum + """ + segment_count = None + segment_minsum = None + + lon = nav_longitude + lat = nav_latitude + lon_fix = gps_longitude + lat_fix = gps_latitude + + logger.info( + f"{'seg#':5s} {'end_sec_diff':12s} {'end_lon_diff':12s} {'end_lat_diff':12s}" # noqa: G004 + f" {'len(segi)':9s} {'seg_min':>9s} {'u_drift (cm/s)':14s} {'v_drift (cm/s)':14s}" + f" {'start datetime of segment':>29}", + ) + + # Any dead reckoned points before first GPS fix - usually empty + # as GPS fix happens before dive + segi = np.where(lat.cf["T"].data < lat_fix.cf["T"].data[0])[0] + if lon[:][segi].any(): + lon_nudged_array = lon[segi] + lat_nudged_array = lat[segi] + dt_nudged = lon.get_index("navigation_time")[segi] + logger.debug( + "Filled _nudged arrays with %d values starting at %s " + "which were before the first GPS fix at %s", + len(segi), + lat.get_index("navigation_time")[0], + lat_fix.get_index("gps_time")[0], + ) + else: + lon_nudged_array = np.array([]) + lat_nudged_array = np.array([]) + dt_nudged = np.array([], dtype="datetime64[ns]") + if segi.any(): + seg_min = ( + lat.get_index("navigation_time")[segi][-1] - lat.get_index("navigation_time")[segi][0] + ).total_seconds() / 60 + else: + seg_min = 0 + logger.info( + f"{' ':5} {'-':>12} {'-':>12} {'-':>12} {len(segi):-9d} {seg_min:9.2f} {'-':>14} {'-':>14} {'-':>29}", # noqa: E501, G004 + ) + + MIN_SEGMENT_LENGTH = 10 + seg_count = 0 + seg_minsum = 0 + for i in range(len(lat_fix) - 1): + # Segment of dead reckoned (under water) positions, each surrounded by GPS fixes + segi = np.where( + np.logical_and( + lat.cf["T"].data > lat_fix.cf["T"].data[i], + lat.cf["T"].data < lat_fix.cf["T"].data[i + 1], + ), + )[0] + if not segi.any(): + logger.debug( + f"No dead reckoned values found between GPS times of " # noqa: G004 + f"{lat_fix.cf['T'].data[i]} and {lat_fix.cf['T'].data[i + 1]}", + ) + continue + + end_sec_diff = float(lat_fix.cf["T"].data[i + 1] - lat.cf["T"].data[segi[-1]]) / 1.0e9 + + end_lon_diff = float(lon_fix[i + 1]) - float(lon[segi[-1]]) + end_lat_diff = float(lat_fix[i + 1]) - float(lat[segi[-1]]) + + # Compute approximate horizontal drift rate as a sanity check + try: + u_drift = ( + end_lon_diff + * float(np.cos(lat_fix[i + 1] * np.pi / 180)) + * 60 + * 185300 + / (float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9) + ) + except ZeroDivisionError: + u_drift = 0 + try: + v_drift = ( + end_lat_diff + * 60 + * 185300 + / (float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9) + ) + except ZeroDivisionError: + v_drift = 0 + + if abs(end_lon_diff) > 1 or abs(end_lat_diff) > 1: + # Error handling - same as original + logger.info( + f"{i:5d}: {end_sec_diff:12.3f} {end_lon_diff:12.7f}" # noqa: G004 + f" {end_lat_diff:12.7f} {len(segi):-9d} {seg_min:9.2f}" + f" {u_drift:14.3f} {v_drift:14.3f} {lat.cf['T'].data[segi][-1]}", + ) + logger.error( + "End of underwater segment dead reckoned position is too different " + "from GPS fix: abs(end_lon_diff) (%s) > 1 or abs(end_lat_diff) (%s) > 1", + end_lon_diff, + end_lat_diff, + ) + logger.info( + "Fix this error by calling _range_qc_combined_nc() in " + "_navigation_process() and/or _gps_process() for %s %s", + auv_name, + mission, + ) + error_message = ( + f"abs(end_lon_diff) ({end_lon_diff}) > 1 or abs(end_lat_diff) ({end_lat_diff}) > 1" + ) + raise ValueError(error_message) + if abs(end_sec_diff) > max_sec_diff_at_end: + logger.warning( + "abs(end_sec_diff) (%s) > max_sec_diff_at_end (%s)", + end_sec_diff, + max_sec_diff_at_end, + ) + logger.info( + "Overriding end_lon_diff (%s) and end_lat_diff (%s) by setting them to 0", + end_lon_diff, + end_lat_diff, + ) + end_lon_diff = 0 + end_lat_diff = 0 + + seg_min = float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9 / 60 + seg_minsum += seg_min + + if len(segi) > MIN_SEGMENT_LENGTH: + logger.info( + f"{i:5d}: {end_sec_diff:12.3f} {end_lon_diff:12.7f}" # noqa: G004 + f" {end_lat_diff:12.7f} {len(segi):-9d} {seg_min:9.2f}" + f" {u_drift:14.3f} {v_drift:14.3f} {lat.cf['T'].data[segi][-1]}", + ) + + # Start with zero adjustment at beginning and linearly ramp up to the diff at the end + lon_nudge = np.interp( + lon.cf["T"].data[segi].astype(np.int64), + [ + lon.cf["T"].data[segi].astype(np.int64)[0], + lon.cf["T"].data[segi].astype(np.int64)[-1], + ], + [0, end_lon_diff], ) + lat_nudge = np.interp( + lat.cf["T"].data[segi].astype(np.int64), + [ + lat.cf["T"].data[segi].astype(np.int64)[0], + lat.cf["T"].data[segi].astype(np.int64)[-1], + ], + [0, end_lat_diff], + ) + + # Sanity checks + MAX_LONGITUDE = 180 + MAX_LATITUDE = 90 + if ( + np.max(np.abs(lon[segi] + lon_nudge)) > MAX_LONGITUDE + or np.max(np.abs(lat[segi] + lon_nudge)) > MAX_LATITUDE + ): + logger.warning( + "Nudged coordinate is way out of reasonable range - segment %d", + seg_count, + ) + logger.warning( + " max(abs(lon)) = %s", + np.max(np.abs(lon[segi] + lon_nudge)), + ) + logger.warning( + " max(abs(lat)) = %s", + np.max(np.abs(lat[segi] + lat_nudge)), + ) + + lon_nudged_array = np.append(lon_nudged_array, lon[segi] + lon_nudge) + lat_nudged_array = np.append(lat_nudged_array, lat[segi] + lat_nudge) + dt_nudged = np.append(dt_nudged, lon.cf["T"].data[segi]) + seg_count += 1 + + # Any dead reckoned points after last GPS fix + segi = np.where(lat.cf["T"].data > lat_fix.cf["T"].data[-1])[0] + seg_min = 0 + if segi.any(): + lon_nudged_array = np.append(lon_nudged_array, lon[segi]) + lat_nudged_array = np.append(lat_nudged_array, lat[segi]) + dt_nudged = np.append(dt_nudged, lon.cf["T"].data[segi]) + seg_min = float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9 / 60 + + logger.info( + f"{seg_count + 1:4d}: {'-':>12} {'-':>12} {'-':>12} {len(segi):-9d} {seg_min:9.2f} {'-':>14} {'-':>14}", # noqa: E501, G004 + ) + segment_count = seg_count + segment_minsum = seg_minsum + + logger.info("Points in final series = %d", len(dt_nudged)) + + lon_nudged = xr.DataArray( + data=lon_nudged_array, + dims=["time"], + coords={"time": dt_nudged}, + name="longitude", + ) + lat_nudged = xr.DataArray( + data=lat_nudged_array, + dims=["time"], + coords={"time": dt_nudged}, + name="latitude", + ) + + # Optional plotting code + if create_plots: + _create_nudge_plots( + lat, lon, lat_fix, lon_fix, lat_nudged, lon_nudged, auv_name, mission, logger + ) + + return lon_nudged, lat_nudged, segment_count, segment_minsum + + +def _create_nudge_plots( # noqa: PLR0913 + lat, lon, lat_fix, lon_fix, lat_nudged, lon_nudged, auv_name, mission, logger +): + """Create debug plots for position nudging (separated for clarity).""" + try: + import matplotlib.pyplot as plt + + try: + import cartopy.crs as ccrs # type: ignore # noqa: I001, PGH003 + from matplotlib import patches + from shapely.geometry import LineString # type: ignore # noqa: PGH003 + + has_cartopy = True + except ImportError: + has_cartopy = False + + # Time series plots + fig, axes = plt.subplots(nrows=2, figsize=(18, 6)) + axes[0].plot(lat_nudged.coords["time"].data, lat_nudged, "-") + axes[0].plot(lat.cf["T"].data, lat, "--") + axes[0].plot(lat_fix.cf["T"].data, lat_fix, "*") + axes[0].set_ylabel("Latitude") + axes[0].legend(["Nudged", "Original", "GPS Fixes"]) + axes[1].plot(lon_nudged.coords["time"].data, lon_nudged, "-") + axes[1].plot(lon.cf["T"].data, lon, "--") + axes[1].plot(lon_fix.cf["T"].data, lon_fix, "*") + axes[1].set_ylabel("Longitude") + axes[1].legend(["Nudged", "Original", "GPS Fixes"]) + title = "Corrected nav from nudge_positions()" + fig.suptitle(title) + axes[0].grid() + axes[1].grid() + logger.debug("Pausing with plot entitled: %s. Close window to continue.", title) + plt.show() + + # Map plot + if has_cartopy: + ax = plt.axes(projection=ccrs.PlateCarree()) + nudged = LineString(zip(lon_nudged.to_numpy(), lat_nudged.to_numpy(), strict=False)) + original = LineString(zip(lon.to_numpy(), lat.to_numpy(), strict=False)) + ax.add_geometries( + [nudged], + crs=ccrs.PlateCarree(), + edgecolor="red", + facecolor="none", + label="Nudged", + ) + ax.add_geometries( + [original], + crs=ccrs.PlateCarree(), + edgecolor="grey", + facecolor="none", + label="Original", + ) + handle_gps = ax.scatter( + lon_fix.to_numpy(), + lat_fix.to_numpy(), + color="green", + label="GPS Fixes", + ) + bounds = nudged.buffer(0.02).bounds + extent = bounds[0], bounds[2], bounds[1], bounds[3] + ax.set_extent(extent, crs=ccrs.PlateCarree()) + ax.coastlines() + + handle_nudged = patches.Rectangle((0, 0), 1, 0.1, facecolor="red") + handle_original = patches.Rectangle((0, 0), 1, 0.1, facecolor="gray") + ax.legend( + [handle_nudged, handle_original, handle_gps], + ["Nudged", "Original", "GPS Fixes"], + ) + ax.gridlines( + crs=ccrs.PlateCarree(), + draw_labels=True, + linewidth=1, + color="gray", + alpha=0.5, + ) + ax.set_title(f"{auv_name} {mission}") + logger.debug( + "Pausing map plot (doesn't work well in VS Code debugger)." + " Close window to continue.", + ) + plt.show() + else: + logger.warning("No map plot, could not import cartopy") + + except ImportError: + logger.warning("Could not create plots - matplotlib not available") diff --git a/src/data/calibrate.py b/src/data/calibrate.py index 704597e4..2cdc8941 100755 --- a/src/data/calibrate.py +++ b/src/data/calibrate.py @@ -27,7 +27,7 @@ __author__ = "Mike McCann" __copyright__ = "Copyright 2020, Monterey Bay Aquarium Research Institute" -import argparse +import argparse # noqa: I001 import logging import os import shlex @@ -50,19 +50,11 @@ from scipy.interpolate import interp1d from seawater import eos80 -try: - import cartopy.crs as ccrs # type: ignore # noqa: PGH003 - from shapely.geometry import LineString # type: ignore # noqa: PGH003 -except ModuleNotFoundError: - # cartopy is not installed, will not be able to plot maps - pass - import pandas as pd import pyproj -from AUV import monotonic_increasing_time_indices +from AUV import monotonic_increasing_time_indices, nudge_positions from hs2_proc import compute_backscatter, hs2_calc_bb, hs2_read_cal_file from logs2netcdfs import BASE_PATH, MISSIONLOGS, MISSIONNETCDFS, TIME, TIME60HZ, AUV_NetCDF -from matplotlib import patches from scipy import signal AVG_SALINITY = 33.6 # Typical value for upper 100m of Monterey Bay @@ -1661,13 +1653,10 @@ def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 }, ) - def _nudge_pos(self, max_sec_diff_at_end=10): # noqa: C901, PLR0912, PLR0915 + def _nudge_pos(self, max_sec_diff_at_end=10): """Apply linear nudges to underwater latitudes and longitudes so that they match the surface gps positions. """ - self.segment_count = None - self.segment_minsum = None - try: lon = self.combined_nc["navigation_longitude"] except KeyError: @@ -1677,279 +1666,22 @@ def _nudge_pos(self, max_sec_diff_at_end=10): # noqa: C901, PLR0912, PLR0915 lon_fix = self.combined_nc["gps_longitude"] lat_fix = self.combined_nc["gps_latitude"] - self.logger.info( - f"{'seg#':5s} {'end_sec_diff':12s} {'end_lon_diff':12s} {'end_lat_diff':12s}" # noqa: G004 - f" {'len(segi)':9s} {'seg_min':>9s} {'u_drift (cm/s)':14s} {'v_drift (cm/s)':14s}" - f" {'start datetime of segment':>29}", - ) - - # Any dead reckoned points before first GPS fix - usually empty - # as GPS fix happens before dive - segi = np.where(lat.cf["T"].data < lat_fix.cf["T"].data[0])[0] - if lon[:][segi].any(): - lon_nudged_array = lon[segi] - lat_nudged_array = lat[segi] - dt_nudged = lon.get_index("navigation_time")[segi] - self.logger.debug( - "Filled _nudged arrays with %d values starting at %s " - "which were before the first GPS fix at %s", - len(segi), - lat.get_index("navigation_time")[0], - lat_fix.get_index("gps_time")[0], - ) - else: - lon_nudged_array = np.array([]) - lat_nudged_array = np.array([]) - dt_nudged = np.array([], dtype="datetime64[ns]") - if segi.any(): - seg_min = ( - lat.get_index("navigation_time")[segi][-1] - - lat.get_index("navigation_time")[segi][0] - ).total_seconds() / 60 - else: - seg_min = 0 - self.logger.info( - f"{' ':5} {'-':>12} {'-':>12} {'-':>12} {len(segi):-9d} {seg_min:9.2f} {'-':>14} {'-':>14} {'-':>29}", # noqa: E501, G004 - ) - - seg_count = 0 - seg_minsum = 0 - for i in range(len(lat_fix) - 1): - # Segment of dead reckoned (under water) positions, each surrounded by GPS fixes - segi = np.where( - np.logical_and( - lat.cf["T"].data > lat_fix.cf["T"].data[i], - lat.cf["T"].data < lat_fix.cf["T"].data[i + 1], - ), - )[0] - if not segi.any(): - self.logger.debug( - f"No dead reckoned values found between GPS times of " # noqa: G004 - f"{lat_fix.cf['T'].data[i]} and {lat_fix.cf['T'].data[i + 1]}", - ) - continue - - end_sec_diff = float(lat_fix.cf["T"].data[i + 1] - lat.cf["T"].data[segi[-1]]) / 1.0e9 - - end_lon_diff = float(lon_fix[i + 1]) - float(lon[segi[-1]]) - end_lat_diff = float(lat_fix[i + 1]) - float(lat[segi[-1]]) - - # Compute approximate horizontal drift rate as a sanity check - try: - u_drift = ( - end_lon_diff - * float(np.cos(lat_fix[i + 1] * np.pi / 180)) - * 60 - * 185300 - / (float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9) - ) - except ZeroDivisionError: - u_drift = 0 - try: - v_drift = ( - end_lat_diff - * 60 - * 185300 - / (float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9) - ) - except ZeroDivisionError: - v_drift = 0 - - if abs(end_lon_diff) > 1 or abs(end_lat_diff) > 1: - # It's a problem if we have more than 1 degree difference at the end of the segment. - # This is usually because the GPS fix is bad, but sometimes it's because the - # dead reckoned position is bad. Or sometimes it's both as in dorado 2016.384.00. - # Early QC by calling _range_qc_combined_nc() can remove the bad points. - # Monterey Bay missions that have bad points can be added to the lists in - # _navigation_process() and/or _gps_process(). - self.logger.info( - f"{i:5d}: {end_sec_diff:12.3f} {end_lon_diff:12.7f}" # noqa: G004 - f" {end_lat_diff:12.7f} {len(segi):-9d} {seg_min:9.2f}" - f" {u_drift:14.3f} {v_drift:14.3f} {lat.cf['T'].data[segi][-1]}", - ) - self.logger.error( - "End of underwater segment dead reckoned position is too different " - "from GPS fix: abs(end_lon_diff) (%s) > 1 or abs(end_lat_diff) (%s) > 1", - end_lon_diff, - end_lat_diff, - ) - self.logger.info( - "Fix this error by calling _range_qc_combined_nc() in " - "_navigation_process() and/or _gps_process() for %s %s", - self.args.auv_name, - self.args.mission, - ) - error_message = ( - f"abs(end_lon_diff) ({end_lon_diff}) > 1 or " - f"abs(end_lat_diff) ({end_lat_diff}) > 1" - ) - raise ValueError(error_message) - if abs(end_sec_diff) > max_sec_diff_at_end: - # Happens in dorado 2016.348.00 because of a bad GPS fixes being removed - self.logger.warning( - "abs(end_sec_diff) (%s) > max_sec_diff_at_end (%s)", - end_sec_diff, - max_sec_diff_at_end, - ) - self.logger.info( - "Overriding end_lon_diff (%s) and end_lat_diff (%s) by setting them to 0", - end_lon_diff, - end_lat_diff, - ) - end_lon_diff = 0 - end_lat_diff = 0 - - seg_min = float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9 / 60 - seg_minsum += seg_min - - if len(segi) > 10: # noqa: PLR2004 - self.logger.info( - f"{i:5d}: {end_sec_diff:12.3f} {end_lon_diff:12.7f}" # noqa: G004 - f" {end_lat_diff:12.7f} {len(segi):-9d} {seg_min:9.2f}" - f" {u_drift:14.3f} {v_drift:14.3f} {lat.cf['T'].data[segi][-1]}", - ) - - # Start with zero adjustment at begining and linearly ramp up to the diff at the end - lon_nudge = np.interp( - lon.cf["T"].data[segi].astype(np.int64), - [ - lon.cf["T"].data[segi].astype(np.int64)[0], - lon.cf["T"].data[segi].astype(np.int64)[-1], - ], - [0, end_lon_diff], - ) - lat_nudge = np.interp( - lat.cf["T"].data[segi].astype(np.int64), - [ - lat.cf["T"].data[segi].astype(np.int64)[0], - lat.cf["T"].data[segi].astype(np.int64)[-1], - ], - [0, end_lat_diff], - ) - - # Sanity checks - if ( - np.max(np.abs(lon[segi] + lon_nudge)) > 180 # noqa: PLR2004 - or np.max(np.abs(lat[segi] + lon_nudge)) > 90 # noqa: PLR2004 - ): - self.logger.warning( - "Nudged coordinate is way out of reasonable range - segment %d", - seg_count, - ) - self.logger.warning( - " max(abs(lon)) = %s", - np.max(np.abs(lon[segi] + lon_nudge)), - ) - self.logger.warning( - " max(abs(lat)) = %s", - np.max(np.abs(lat[segi] + lat_nudge)), - ) - - lon_nudged_array = np.append(lon_nudged_array, lon[segi] + lon_nudge) - lat_nudged_array = np.append(lat_nudged_array, lat[segi] + lat_nudge) - dt_nudged = np.append(dt_nudged, lon.cf["T"].data[segi]) - seg_count += 1 - - # Any dead reckoned points after first GPS fix - not possible to nudge, just copy in - segi = np.where(lat.cf["T"].data > lat_fix.cf["T"].data[-1])[0] - seg_min = 0 - if segi.any(): - lon_nudged_array = np.append(lon_nudged_array, lon[segi]) - lat_nudged_array = np.append(lat_nudged_array, lat[segi]) - dt_nudged = np.append(dt_nudged, lon.cf["T"].data[segi]) - seg_min = float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9 / 60 - - self.logger.info( - f"{seg_count + 1:4d}: {'-':>12} {'-':>12} {'-':>12} {len(segi):-9d} {seg_min:9.2f} {'-':>14} {'-':>14}", # noqa: E501, G004 - ) - self.segment_count = seg_count - self.segment_minsum = seg_minsum - - self.logger.info("Points in final series = %d", len(dt_nudged)) - - lon_nudged = xr.DataArray( - data=lon_nudged_array, - dims=["time"], - coords={"time": dt_nudged}, - name="longitude", - ) - lat_nudged = xr.DataArray( - data=lat_nudged_array, - dims=["time"], - coords={"time": dt_nudged}, - name="latitude", - ) - if self.args.plot: - fig, axes = plt.subplots(nrows=2, figsize=(18, 6)) - axes[0].plot(lat_nudged.coords["time"].data, lat_nudged, "-") - axes[0].plot(lat.cf["T"].data, lat, "--") - axes[0].plot(lat_fix.cf["T"].data, lat_fix, "*") - axes[0].set_ylabel("Latitude") - axes[0].legend(["Nudged", "Original", "GPS Fixes"]) - axes[1].plot(lon_nudged.coords["time"].data, lon_nudged, "-") - axes[1].plot(lon.cf["T"].data, lon, "--") - axes[1].plot(lon_fix.cf["T"].data, lon_fix, "*") - axes[1].set_ylabel("Longitude") - axes[1].legend(["Nudged", "Original", "GPS Fixes"]) - title = "Corrected nav from _nudge_pos()" - fig.suptitle(title) - axes[0].grid() - axes[1].grid() - self.logger.debug("Pausing with plot entitled: %s. Close window to continue.", title) - plt.show() - - gps_plot = True - if gps_plot: - try: - ax = plt.axes(projection=ccrs.PlateCarree()) - except NameError: - self.logger.warning("No gps_plot, could not import cartopy") - return lon_nudged, lat_nudged - nudged = LineString(zip(lon_nudged.to_numpy(), lat_nudged.to_numpy(), strict=False)) - original = LineString(zip(lon.to_numpy(), lat.to_numpy(), strict=False)) - ax.add_geometries( - [nudged], - crs=ccrs.PlateCarree(), - edgecolor="red", - facecolor="none", - label="Nudged", - ) - ax.add_geometries( - [original], - crs=ccrs.PlateCarree(), - edgecolor="grey", - facecolor="none", - label="Original", - ) - handle_gps = ax.scatter( - lon_fix.to_numpy(), - lat_fix.to_numpy(), - color="green", - label="GPS Fixes", - ) - bounds = nudged.buffer(0.02).bounds - extent = bounds[0], bounds[2], bounds[1], bounds[3] - ax.set_extent(extent, crs=ccrs.PlateCarree()) - ax.coastlines() - handle_nudged = patches.Rectangle((0, 0), 1, 0.1, facecolor="red") - handle_original = patches.Rectangle((0, 0), 1, 0.1, facecolor="gray") - ax.legend( - [handle_nudged, handle_original, handle_gps], - ["Nudged", "Original", "GPS Fixes"], - ) - ax.gridlines( - crs=ccrs.PlateCarree(), - draw_labels=True, - linewidth=1, - color="gray", - alpha=0.5, - ) - ax.set_title(f"{self.args.auv_name} {self.args.mission}") - self.logger.debug( - "Pausing map plot (doesn't work well in VS Code debugger)." - " Close window to continue.", - ) - plt.show() + # Use the shared function from AUV module + lon_nudged, lat_nudged, segment_count, segment_minsum = nudge_positions( + nav_longitude=lon, + nav_latitude=lat, + gps_longitude=lon_fix, + gps_latitude=lat_fix, + logger=self.logger, + auv_name=self.args.auv_name, + mission=self.args.mission, + max_sec_diff_at_end=max_sec_diff_at_end, + create_plots=True, + ) + + # Store results in instance variables for compatibility + self.segment_count = segment_count + self.segment_minsum = segment_minsum return lon_nudged, lat_nudged diff --git a/src/data/combine.py b/src/data/combine.py index 0a53bf69..2bddec3e 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -25,7 +25,7 @@ __author__ = "Mike McCann" __copyright__ = "Copyright 2025, Monterey Bay Aquarium Research Institute" -import argparse +import argparse # noqa: I001 import logging import os import shlex @@ -39,7 +39,6 @@ from pathlib import Path from socket import gethostname from typing import NamedTuple - import cf_xarray # Needed for the .cf accessor # noqa: F401 import defusedxml.ElementTree as ET # noqa: N817 import matplotlib.pyplot as plt @@ -48,19 +47,11 @@ from scipy.interpolate import interp1d from seawater import eos80 -try: - import cartopy.crs as ccrs # type: ignore # noqa: PGH003 - from shapely.geometry import LineString # type: ignore # noqa: PGH003 -except ModuleNotFoundError: - # cartopy is not installed, will not be able to plot maps - pass - import pandas as pd import pyproj -from AUV import monotonic_increasing_time_indices +from AUV import monotonic_increasing_time_indices, nudge_positions from hs2_proc import compute_backscatter, hs2_calc_bb, hs2_read_cal_file from logs2netcdfs import BASE_PATH, MISSIONLOGS, MISSIONNETCDFS, TIME, TIME60HZ, AUV_NetCDF -from matplotlib import patches from scipy import signal AVG_SALINITY = 33.6 # Typical value for upper 100m of Monterey Bay @@ -1669,13 +1660,10 @@ def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 }, ) - def _nudge_pos(self, max_sec_diff_at_end=10): # noqa: C901, PLR0912, PLR0915 + def _nudge_pos(self, max_sec_diff_at_end=10): """Apply linear nudges to underwater latitudes and longitudes so that they match the surface gps positions. """ - self.segment_count = None - self.segment_minsum = None - try: lon = self.combined_nc["navigation_longitude"] except KeyError: @@ -1685,279 +1673,22 @@ def _nudge_pos(self, max_sec_diff_at_end=10): # noqa: C901, PLR0912, PLR0915 lon_fix = self.combined_nc["gps_longitude"] lat_fix = self.combined_nc["gps_latitude"] - self.logger.info( - f"{'seg#':5s} {'end_sec_diff':12s} {'end_lon_diff':12s} {'end_lat_diff':12s}" # noqa: G004 - f" {'len(segi)':9s} {'seg_min':>9s} {'u_drift (cm/s)':14s} {'v_drift (cm/s)':14s}" - f" {'start datetime of segment':>29}", - ) - - # Any dead reckoned points before first GPS fix - usually empty - # as GPS fix happens before dive - segi = np.where(lat.cf["T"].data < lat_fix.cf["T"].data[0])[0] - if lon[:][segi].any(): - lon_nudged_array = lon[segi] - lat_nudged_array = lat[segi] - dt_nudged = lon.get_index("navigation_time")[segi] - self.logger.debug( - "Filled _nudged arrays with %d values starting at %s " - "which were before the first GPS fix at %s", - len(segi), - lat.get_index("navigation_time")[0], - lat_fix.get_index("gps_time")[0], - ) - else: - lon_nudged_array = np.array([]) - lat_nudged_array = np.array([]) - dt_nudged = np.array([], dtype="datetime64[ns]") - if segi.any(): - seg_min = ( - lat.get_index("navigation_time")[segi][-1] - - lat.get_index("navigation_time")[segi][0] - ).total_seconds() / 60 - else: - seg_min = 0 - self.logger.info( - f"{' ':5} {'-':>12} {'-':>12} {'-':>12} {len(segi):-9d} {seg_min:9.2f} {'-':>14} {'-':>14} {'-':>29}", # noqa: E501, G004 - ) - - seg_count = 0 - seg_minsum = 0 - for i in range(len(lat_fix) - 1): - # Segment of dead reckoned (under water) positions, each surrounded by GPS fixes - segi = np.where( - np.logical_and( - lat.cf["T"].data > lat_fix.cf["T"].data[i], - lat.cf["T"].data < lat_fix.cf["T"].data[i + 1], - ), - )[0] - if not segi.any(): - self.logger.debug( - f"No dead reckoned values found between GPS times of " # noqa: G004 - f"{lat_fix.cf['T'].data[i]} and {lat_fix.cf['T'].data[i + 1]}", - ) - continue - - end_sec_diff = float(lat_fix.cf["T"].data[i + 1] - lat.cf["T"].data[segi[-1]]) / 1.0e9 - - end_lon_diff = float(lon_fix[i + 1]) - float(lon[segi[-1]]) - end_lat_diff = float(lat_fix[i + 1]) - float(lat[segi[-1]]) - - # Compute approximate horizontal drift rate as a sanity check - try: - u_drift = ( - end_lon_diff - * float(np.cos(lat_fix[i + 1] * np.pi / 180)) - * 60 - * 185300 - / (float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9) - ) - except ZeroDivisionError: - u_drift = 0 - try: - v_drift = ( - end_lat_diff - * 60 - * 185300 - / (float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9) - ) - except ZeroDivisionError: - v_drift = 0 - - if abs(end_lon_diff) > 1 or abs(end_lat_diff) > 1: - # It's a problem if we have more than 1 degree difference at the end of the segment. - # This is usually because the GPS fix is bad, but sometimes it's because the - # dead reckoned position is bad. Or sometimes it's both as in dorado 2016.384.00. - # Early QC by calling _range_qc_combined_nc() can remove the bad points. - # Monterey Bay missions that have bad points can be added to the lists in - # _navigation_process() and/or _gps_process(). - self.logger.info( - f"{i:5d}: {end_sec_diff:12.3f} {end_lon_diff:12.7f}" # noqa: G004 - f" {end_lat_diff:12.7f} {len(segi):-9d} {seg_min:9.2f}" - f" {u_drift:14.3f} {v_drift:14.3f} {lat.cf['T'].data[segi][-1]}", - ) - self.logger.error( - "End of underwater segment dead reckoned position is too different " - "from GPS fix: abs(end_lon_diff) (%s) > 1 or abs(end_lat_diff) (%s) > 1", - end_lon_diff, - end_lat_diff, - ) - self.logger.info( - "Fix this error by calling _range_qc_combined_nc() in " - "_navigation_process() and/or _gps_process() for %s %s", - self.args.auv_name, - self.args.mission, - ) - error_message = ( - f"abs(end_lon_diff) ({end_lon_diff}) > 1 or " - f"abs(end_lat_diff) ({end_lat_diff}) > 1" - ) - raise ValueError(error_message) - if abs(end_sec_diff) > max_sec_diff_at_end: - # Happens in dorado 2016.348.00 because of a bad GPS fixes being removed - self.logger.warning( - "abs(end_sec_diff) (%s) > max_sec_diff_at_end (%s)", - end_sec_diff, - max_sec_diff_at_end, - ) - self.logger.info( - "Overriding end_lon_diff (%s) and end_lat_diff (%s) by setting them to 0", - end_lon_diff, - end_lat_diff, - ) - end_lon_diff = 0 - end_lat_diff = 0 - - seg_min = float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9 / 60 - seg_minsum += seg_min - - if len(segi) > 10: # noqa: PLR2004 - self.logger.info( - f"{i:5d}: {end_sec_diff:12.3f} {end_lon_diff:12.7f}" # noqa: G004 - f" {end_lat_diff:12.7f} {len(segi):-9d} {seg_min:9.2f}" - f" {u_drift:14.3f} {v_drift:14.3f} {lat.cf['T'].data[segi][-1]}", - ) - - # Start with zero adjustment at begining and linearly ramp up to the diff at the end - lon_nudge = np.interp( - lon.cf["T"].data[segi].astype(np.int64), - [ - lon.cf["T"].data[segi].astype(np.int64)[0], - lon.cf["T"].data[segi].astype(np.int64)[-1], - ], - [0, end_lon_diff], - ) - lat_nudge = np.interp( - lat.cf["T"].data[segi].astype(np.int64), - [ - lat.cf["T"].data[segi].astype(np.int64)[0], - lat.cf["T"].data[segi].astype(np.int64)[-1], - ], - [0, end_lat_diff], - ) - - # Sanity checks - if ( - np.max(np.abs(lon[segi] + lon_nudge)) > 180 # noqa: PLR2004 - or np.max(np.abs(lat[segi] + lon_nudge)) > 90 # noqa: PLR2004 - ): - self.logger.warning( - "Nudged coordinate is way out of reasonable range - segment %d", - seg_count, - ) - self.logger.warning( - " max(abs(lon)) = %s", - np.max(np.abs(lon[segi] + lon_nudge)), - ) - self.logger.warning( - " max(abs(lat)) = %s", - np.max(np.abs(lat[segi] + lat_nudge)), - ) - - lon_nudged_array = np.append(lon_nudged_array, lon[segi] + lon_nudge) - lat_nudged_array = np.append(lat_nudged_array, lat[segi] + lat_nudge) - dt_nudged = np.append(dt_nudged, lon.cf["T"].data[segi]) - seg_count += 1 - - # Any dead reckoned points after first GPS fix - not possible to nudge, just copy in - segi = np.where(lat.cf["T"].data > lat_fix.cf["T"].data[-1])[0] - seg_min = 0 - if segi.any(): - lon_nudged_array = np.append(lon_nudged_array, lon[segi]) - lat_nudged_array = np.append(lat_nudged_array, lat[segi]) - dt_nudged = np.append(dt_nudged, lon.cf["T"].data[segi]) - seg_min = float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9 / 60 - - self.logger.info( - f"{seg_count + 1:4d}: {'-':>12} {'-':>12} {'-':>12} {len(segi):-9d} {seg_min:9.2f} {'-':>14} {'-':>14}", # noqa: E501, G004 - ) - self.segment_count = seg_count - self.segment_minsum = seg_minsum - - self.logger.info("Points in final series = %d", len(dt_nudged)) - - lon_nudged = xr.DataArray( - data=lon_nudged_array, - dims=["time"], - coords={"time": dt_nudged}, - name="longitude", - ) - lat_nudged = xr.DataArray( - data=lat_nudged_array, - dims=["time"], - coords={"time": dt_nudged}, - name="latitude", - ) - if self.args.plot: - fig, axes = plt.subplots(nrows=2, figsize=(18, 6)) - axes[0].plot(lat_nudged.coords["time"].data, lat_nudged, "-") - axes[0].plot(lat.cf["T"].data, lat, "--") - axes[0].plot(lat_fix.cf["T"].data, lat_fix, "*") - axes[0].set_ylabel("Latitude") - axes[0].legend(["Nudged", "Original", "GPS Fixes"]) - axes[1].plot(lon_nudged.coords["time"].data, lon_nudged, "-") - axes[1].plot(lon.cf["T"].data, lon, "--") - axes[1].plot(lon_fix.cf["T"].data, lon_fix, "*") - axes[1].set_ylabel("Longitude") - axes[1].legend(["Nudged", "Original", "GPS Fixes"]) - title = "Corrected nav from _nudge_pos()" - fig.suptitle(title) - axes[0].grid() - axes[1].grid() - self.logger.debug("Pausing with plot entitled: %s. Close window to continue.", title) - plt.show() - - gps_plot = True - if gps_plot: - try: - ax = plt.axes(projection=ccrs.PlateCarree()) - except NameError: - self.logger.warning("No gps_plot, could not import cartopy") - return lon_nudged, lat_nudged - nudged = LineString(zip(lon_nudged.to_numpy(), lat_nudged.to_numpy(), strict=False)) - original = LineString(zip(lon.to_numpy(), lat.to_numpy(), strict=False)) - ax.add_geometries( - [nudged], - crs=ccrs.PlateCarree(), - edgecolor="red", - facecolor="none", - label="Nudged", - ) - ax.add_geometries( - [original], - crs=ccrs.PlateCarree(), - edgecolor="grey", - facecolor="none", - label="Original", - ) - handle_gps = ax.scatter( - lon_fix.to_numpy(), - lat_fix.to_numpy(), - color="green", - label="GPS Fixes", - ) - bounds = nudged.buffer(0.02).bounds - extent = bounds[0], bounds[2], bounds[1], bounds[3] - ax.set_extent(extent, crs=ccrs.PlateCarree()) - ax.coastlines() - handle_nudged = patches.Rectangle((0, 0), 1, 0.1, facecolor="red") - handle_original = patches.Rectangle((0, 0), 1, 0.1, facecolor="gray") - ax.legend( - [handle_nudged, handle_original, handle_gps], - ["Nudged", "Original", "GPS Fixes"], - ) - ax.gridlines( - crs=ccrs.PlateCarree(), - draw_labels=True, - linewidth=1, - color="gray", - alpha=0.5, - ) - ax.set_title(f"{self.args.auv_name} {self.args.mission}") - self.logger.debug( - "Pausing map plot (doesn't work well in VS Code debugger)." - " Close window to continue.", - ) - plt.show() + # Use the shared function from AUV module + lon_nudged, lat_nudged, segment_count, segment_minsum = nudge_positions( + nav_longitude=lon, + nav_latitude=lat, + gps_longitude=lon_fix, + gps_latitude=lat_fix, + logger=self.logger, + auv_name=self.args.auv_name, + mission=self.args.mission, + max_sec_diff_at_end=max_sec_diff_at_end, + create_plots=True, + ) + + # Store results in instance variables for compatibility + self.segment_count = segment_count + self.segment_minsum = segment_minsum return lon_nudged, lat_nudged From 7efef8de6ec09fc48053fa8ece7f13e4a0314cb3 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 16 Oct 2025 10:40:47 -0700 Subject: [PATCH 014/121] WIP: Initial attempt at a process_lrauv.py module. --- src/data/process_lrauv.py | 43 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100755 src/data/process_lrauv.py diff --git a/src/data/process_lrauv.py b/src/data/process_lrauv.py new file mode 100755 index 00000000..1af00808 --- /dev/null +++ b/src/data/process_lrauv.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +""" +Process LRAUV data from NetCDF4 log files to resampled .nc files. +(This replaces the legacy lrauvNc4ToNetcdf.py script in STOQS.) + +Find LRAUV log files in smb://atlas.shore.mbari.org/LRAUV/missionlogs +and run the data through standard science data processing to calibrated, +aligned, and resampled netCDF files. Use a standard set of processing options; +more flexibility is available via the inndividual processing modules. + +Limit processing to specific steps by providing arguments: + --extract + --combine + --resample + --archive + --cleanup +If none provided then perform all steps. + +Uses command line arguments from nc42netcdfs.py and combine.py. +""" + +__author__ = "Mike McCann" +__copyright__ = "Copyright 2025, Monterey Bay Aquarium Research Institute" + +from process import Processor + + +class LRAUVProcessor(Processor): + pass + + +if __name__ == "__main__": + VEHICLE = "tethys" + LRAUV_DIR = "/Volumes/LRAUV" + # It's possible that we might need calibration files for some sensors + # in the future, so point to a potential directory where they can be found. + CALIBRATION_DIR = "/Volumes/DMO/MDUC_CORE_CTD_200103/Calibration Files" + MOUNT_DIR = "smb://atlas.shore.mbari.org/LRAUV" + START_YEAR = 2012 + + proc = LRAUVProcessor(VEHICLE, LRAUV_DIR, MOUNT_DIR, CALIBRATION_DIR) + proc.process_command_line() + proc.process_missions(START_YEAR) From 1de36bb47a1580e82f415ddaf73a33d43076c1c2 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 21 Oct 2025 11:52:33 -0700 Subject: [PATCH 015/121] Simplify calling methods with just log_file, save using _Group pattern. --- src/data/nc42netcdfs.py | 55 ++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index eda7ae2d..7b958395 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -21,7 +21,7 @@ # Local directory that serves as the work area for log_files and netcdf files BASE_LRAUV_WEB = "https://dods.mbari.org/data/lrauv/" -BASE_PATH = Path(__file__).parent.joinpath("../../data/lrauv_data").resolve() +BASE_LRAUV_PATH = Path(__file__).parent.joinpath("../../data/lrauv_data").resolve() SUMMARY_SOURCE = "Original LRAUV data extracted from {}, group {}" GROUPS = ["navigation", "ctd", "ecopuck"] # Your actual group names @@ -181,23 +181,34 @@ def get_groups_netcdf4(self, file_path): with netCDF4.Dataset(file_path, "r") as dataset: return list(dataset.groups.keys()) - def extract_groups_to_files_netcdf4(self, input_file, output_dir): - """Extract each group to a separate NetCDF file using netCDF4 library. + def extract_groups_to_files_netcdf4(self, log_file: str) -> Path: + """Extract each group from .nc4 file to a separate .nc file using netCDF4 library. + Args: + log_file: Relative path from BASE_LRAUV_WEB to .nc4 log_file + + Returns: + netcdfs_dir: Local directory where NetCDF files were saved + + Note: The xarray library fails reading the WetLabsBB2FL group from this file: brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4 with garbled data for the serial variable (using ncdump): serial = "$F!{<8D>\031@7\024[P]\001\030" ; but netCDF4 can skip over it and read the rest of the variables. """ - output_dir = Path(output_dir) - output_dir.mkdir(exist_ok=True, parents=True) + # Download over http so that we don't need to mount smb shares + url = os.path.join(BASE_LRAUV_WEB, log_file) # noqa: PTH118 + netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) + netcdfs_dir.mkdir(exist_ok=True, parents=True) - self.logger.info("Extracting data from %s", input_file) + extract.logger.info("Downloading %s", url) + input_file = extract.download_with_pooch(url, netcdfs_dir, self.args.known_hash) + self.logger.info("Extracting data from %s", input_file) with netCDF4.Dataset(input_file, "r") as src_dataset: # Extract root group first - self._extract_root_group(src_dataset, output_dir) + self._extract_root_group(src_dataset, log_file, netcdfs_dir) # Extract all other groups all_groups = list(src_dataset.groups.keys()) @@ -206,10 +217,12 @@ def extract_groups_to_files_netcdf4(self, input_file, output_dir): if group_name != "/" and group_name not in all_groups: self.logger.warning("Group %s not found in %s", group_name, input_file) continue - self._extract_single_group(src_dataset, group_name, output_dir) + self._extract_single_group(src_dataset, group_name, log_file, netcdfs_dir) + + return netcdfs_dir - def _extract_root_group(self, src_dataset: netCDF4.Dataset, output_dir: Path): - """Extract variables from the root group to Universals.nc.""" + def _extract_root_group(self, src_dataset: netCDF4.Dataset, log_file: str, output_dir: Path): + """Extract variables from the root group to _Group_Universals.nc.""" root_parms = SCIENG_PARMS.get("/", []) if not root_parms: return @@ -219,7 +232,7 @@ def _extract_root_group(self, src_dataset: netCDF4.Dataset, output_dir: Path): vars_to_extract = self._get_available_variables(src_dataset, root_parms) if vars_to_extract: - output_file = output_dir / "Universals.nc" + output_file = output_dir / f"{Path(log_file).stem}_Group_Universals.nc" self._create_netcdf_file(src_dataset, vars_to_extract, output_file) self.logger.info("Extracted root group '/' to %s", output_file) else: @@ -229,9 +242,9 @@ def _extract_root_group(self, src_dataset: netCDF4.Dataset, output_dir: Path): self.logger.warning("Could not extract root group '/': %s", e) def _extract_single_group( - self, src_dataset: netCDF4.Dataset, group_name: str, output_dir: Path + self, src_dataset: netCDF4.Dataset, group_name: str, log_file: str, output_dir: Path ): - """Extract a single group to its own NetCDF file.""" + """Extract a single group to its own NetCDF file named like _Group_.nc.""" group_parms = SCIENG_PARMS[group_name] try: @@ -241,7 +254,7 @@ def _extract_single_group( vars_to_extract = self._get_available_variables(src_group, group_parms) if vars_to_extract: - output_file = output_dir / f"{group_name}.nc" + output_file = output_dir / f"{Path(log_file).stem}_Group_{group_name}.nc" self._create_netcdf_file(src_group, vars_to_extract, output_file) self.logger.info("Extracted %s to %s", group_name, output_file) else: @@ -393,8 +406,11 @@ def process_command_line(self): parser.add_argument( "--base_path", action="store", - default=BASE_PATH, - help="Base directory for missionlogs and missionnetcdfs, default: auv_data", + default=BASE_LRAUV_PATH, + help=( + "Base directory for missionlogs and missionnetcdfs, " + "default: auv_data in repo data directory" + ), ) parser.add_argument( "--title", @@ -488,9 +504,4 @@ def process_command_line(self): extract.show_variable_mapping() sys.exit(0) else: - url = os.path.join(BASE_LRAUV_WEB, extract.args.log_file) # noqa: PTH118 - output_dir = Path(BASE_PATH, Path(extract.args.log_file).parent) - extract.logger.info("Downloading %s", url) - input_file = extract.download_with_pooch(url, output_dir, extract.args.known_hash) - # extract.extract_groups_to_files(input_file, output_dir) - extract.extract_groups_to_files_netcdf4(input_file, output_dir) + extract.extract_groups_to_files_netcdf4(extract.args.log_file) From 2c7575ae75ab115192f74b2e4881b68642a15589 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 21 Oct 2025 11:53:05 -0700 Subject: [PATCH 016/121] Add test for process_lrauv.py --- .vscode/launch.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.vscode/launch.json b/.vscode/launch.json index 88e8ae2b..804aa7f1 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -293,6 +293,15 @@ "console": "integratedTerminal", "args": ["-v", "1", "--noinput", "--no_cleanup", "--download", "--mission", "2011.256.02"] }, + { + "name": "process_lrauv", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/src/data/process_lrauv.py", + "console": "integratedTerminal", + "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] + //"args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901", "--end", "20121101", "--noinput"] + }, ] } From 6abcb698b98085241f844e1b2be5a6ce968c0ee0 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 21 Oct 2025 11:53:55 -0700 Subject: [PATCH 017/121] Call process_log_files(), signifying that these are LRAUV data. --- src/data/process_lrauv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/process_lrauv.py b/src/data/process_lrauv.py index 1af00808..7a99f92b 100755 --- a/src/data/process_lrauv.py +++ b/src/data/process_lrauv.py @@ -40,4 +40,4 @@ class LRAUVProcessor(Processor): proc = LRAUVProcessor(VEHICLE, LRAUV_DIR, MOUNT_DIR, CALIBRATION_DIR) proc.process_command_line() - proc.process_missions(START_YEAR) + proc.process_log_files() From 9aecc2aa1201dec737ec62d9ac17cbb87e2479d7 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 21 Oct 2025 15:56:57 -0700 Subject: [PATCH 018/121] Implement first and "last" steps in process.py for LRAUV data. This gives a decent foundation for infilling the combine -> align -> resample steps using the existing framework that works for dorado and i2map data. --- .vscode/launch.json | 4 +- src/data/archive.py | 42 ++++++++ src/data/nc42netcdfs.py | 6 +- src/data/process.py | 228 ++++++++++++++++++++++++++++++++++------ 4 files changed, 242 insertions(+), 38 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 804aa7f1..73a3045b 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -299,7 +299,9 @@ "request": "launch", "program": "${workspaceFolder}/src/data/process_lrauv.py", "console": "integratedTerminal", - "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] + //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber"] + "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber", "--no_cleanup"] //"args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901", "--end", "20121101", "--noinput"] }, diff --git a/src/data/archive.py b/src/data/archive.py index a1a3748a..2bf8aa37 100755 --- a/src/data/archive.py +++ b/src/data/archive.py @@ -19,10 +19,12 @@ from create_products import MISSIONIMAGES, MISSIONODVS from logs2netcdfs import BASE_PATH, LOG_FILES, MISSIONNETCDFS, AUV_NetCDF +from nc42netcdfs import BASE_LRAUV_PATH from resample import FREQ LOG_NAME = "processing.log" AUVCTD_VOL = "/Volumes/AUVCTD" +LRAUV_VOL = "/Volumes/LRAUV" class Archiver: @@ -170,6 +172,46 @@ def copy_to_AUVTCD(self, nc_file_base: Path, freq: str = FREQ) -> None: # noqa: def copy_to_M3(self, resampled_nc_file: str) -> None: pass + def copy_to_LRAUV(self, log_file: str, freq: str = FREQ) -> None: + "Copy the intermediate and resampled netCDF file(s) to the archive LRAUV location" + src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) + dst_dir = Path(LRAUV_VOL, Path(log_file).parent) + try: + Path(dst_dir).stat() + except FileNotFoundError: + self.logger.exception("%s not found", dst_dir) + self.logger.info("Is %s mounted?", self.mount_dir) + sys.exit(1) + for src_file in sorted(src_dir.glob(f"{Path(log_file).stem}_Group_*.nc")): + dst_file = Path(dst_dir, src_file.name) + if self.args.clobber: + if dst_file.exists(): + self.logger.info("Removing %s", dst_file) + dst_file.unlink() + if src_file.exists(): + shutil.copyfile(src_file, dst_file) + self.logger.info("copyfile %s %s done.", src_file, dst_dir) + else: + self.logger.info( + "%-75s exists, but is not being archived because --clobber is not specified.", + src_file.name, + ) + for ftype in (f"{freq}.nc", "cal.nc", "align.nc"): + src_file = Path(src_dir, f"{Path(log_file).stem}_{ftype}") + dst_file = Path(dst_dir, src_file.name) + if self.args.clobber: + if dst_file.exists(): + self.logger.info("Removing %s", dst_file) + dst_file.unlink() + if src_file.exists(): + shutil.copyfile(src_file, dst_file) + self.logger.info("copyfile %s %s done.", src_file, dst_dir) + else: + self.logger.info( + "%-36s exists, but is not being archived because --clobber is not specified.", # noqa: E501 + src_file.name, + ) + def process_command_line(self): parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 7b958395..877a7adf 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -202,8 +202,8 @@ def extract_groups_to_files_netcdf4(self, log_file: str) -> Path: netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) netcdfs_dir.mkdir(exist_ok=True, parents=True) - extract.logger.info("Downloading %s", url) - input_file = extract.download_with_pooch(url, netcdfs_dir, self.args.known_hash) + self.logger.info("Downloading %s", url) + input_file = self.download_with_pooch(url, netcdfs_dir) self.logger.info("Extracting data from %s", input_file) with netCDF4.Dataset(input_file, "r") as src_dataset: @@ -248,7 +248,7 @@ def _extract_single_group( group_parms = SCIENG_PARMS[group_name] try: - self.logger.info(" Group %s", group_name) + self.logger.debug(" Group %s", group_name) src_group = src_dataset.groups[group_name] vars_to_extract = self._get_available_variables(src_group, group_parms) diff --git a/src/data/process.py b/src/data/process.py index f0036d72..4dcedd38 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -1,13 +1,26 @@ #!/usr/bin/env python """ -Base module for data processing. +Base module for data processing for Dorado class and LRAUV class data. Run the data through standard science data processing to calibrated, aligned, and resampled netCDF files. Use a standard set of processing options; more flexibility is available via the inndividual processing modules. +The desire is to reuse as much code as possible between Dorado class +and LRAUV class data processing. The initial steps of creating the _cal.nc +files differ because Dorado class data are raw binary log files that need to be +processed to _nc files, while LRAUV class data are NetCDF4 log files that +already contain much of the necessary information. The initial step for Dorado +class data are: download_process and calibrate, while for LRAUV class data +are: extract and combine. After that, the processing steps are similar with +the data in a local directory organized similarly to their institutional +archives. + +Dorado class data processing: +============================= + Limit processing to specific steps by providing arugments: - --download_process + --download_process (logs2netcdf.py & lopcToNetCDF.py) --calibrate --align --resample @@ -18,6 +31,21 @@ If none provided then perform all steps. Uses command line arguments from logs2netcdf.py and calibrate.py. + + +LRAUV class data processing: +============================ + +Limit processing to specific steps by providing arugments: + --extract (nc42netcdfs.py) + --combine + --align + --resample + --archive + --create_products + --email_to + --cleanup +If none provided then perform all steps. """ __author__ = "Mike McCann" @@ -45,6 +73,7 @@ from emailer import NOTIFICATION_EMAIL, Emailer from logs2netcdfs import BASE_PATH, MISSIONLOGS, MISSIONNETCDFS, AUV_NetCDF from lopcToNetCDF import LOPC_Processor, UnexpectedAreaOfCode +from nc42netcdfs import BASE_LRAUV_PATH, BASE_LRAUV_WEB, Extract from resample import ( AUVCTD_OPENDAP_BASE, FLASH_THRESHOLD, @@ -67,6 +96,29 @@ class FailedMission(Exception): pass +def log_file_processor(func): + """Decorator to handle LRAUV log_file processing exceptions and cleanup.""" + + def wrapper(self, log_file: str): + t_start = time.time() + try: + return func(self, log_file) + except (TestMission, FailedMission) as e: + self.logger.info(str(e)) + finally: + if hasattr(self, "log_handler"): + # Cleanup and archiving logic + self.archive(mission=None, log_file=log_file) + if not self.args.no_cleanup: + self.cleanup(log_file=log_file) + self.logger.info( + "log_file %s took %.1f seconds to process", log_file, time.time() - t_start + ) + self.logger.removeHandler(self.log_handler) + + return wrapper + + class Processor: """ Base class for data processing. Run the data through standard science data @@ -320,10 +372,20 @@ def resample(self, mission: str) -> None: finally: resamp.logger.removeHandler(self.log_handler) - def archive(self, mission: str, add_logger_handlers: bool = True) -> None: # noqa: FBT001, FBT002 + def archive( + self, + mission: str = None, + log_file: Path = None, + add_logger_handlers: bool = True, # noqa: FBT001, FBT002 + ) -> None: + """Archiving steps for mission or log_file. + + If mission is provided, archive the processed data for Dorado class vehicles. + If log_file is provided, archive the processed data for LRAUV class vehicles.""" arch = Archiver(add_logger_handlers) arch.args = argparse.Namespace() arch.args.auv_name = self.vehicle + arch.mount_dir = self.mount_dir arch.args.mission = mission arch.commandline = self.commandline arch.args.create_products = self.args.create_products @@ -334,25 +396,33 @@ def archive(self, mission: str, add_logger_handlers: bool = True) -> None: # no arch.args.verbose = self.args.verbose arch.logger.setLevel(self._log_levels[self.args.verbose]) if add_logger_handlers: - self.logger.info("Archiving steps for %s", mission) arch.logger.addHandler(self.log_handler) - file_name_base = f"{arch.args.auv_name}_{arch.args.mission}" - nc_file_base = Path( - BASE_PATH, - arch.args.auv_name, - MISSIONNETCDFS, - arch.args.mission, - file_name_base, - ) - self.logger.info("nc_file_base = %s, BASE_PATH = %s", nc_file_base, BASE_PATH) - if str(BASE_PATH).startswith(("/home/runner/", "/root")): - arch.logger.info( - "Not archiving %s %s to AUVCTD as it's likely CI testing", + if mission: + # Dorado class vehicle archiving + self.logger.info("Archiving steps for %s", mission) + file_name_base = f"{arch.args.auv_name}_{arch.args.mission}" + nc_file_base = Path( + BASE_PATH, arch.args.auv_name, + MISSIONNETCDFS, arch.args.mission, + file_name_base, ) + self.logger.info("nc_file_base = %s, BASE_PATH = %s", nc_file_base, BASE_PATH) + if str(BASE_PATH).startswith(("/home/runner/", "/root")): + arch.logger.info( + "Not archiving %s %s to AUVCTD as it's likely CI testing", + arch.args.auv_name, + arch.args.mission, + ) + else: + arch.copy_to_AUVTCD(nc_file_base, self.args.freq) + elif log_file: + # LRAUV class vehicle archiving + self.logger.info("Archiving steps for %s", log_file) + arch.copy_to_LRAUV(log_file, freq=self.args.freq) else: - arch.copy_to_AUVTCD(nc_file_base, self.args.freq) + arch.logger.error("Either mission or log_file must be provided for archiving.") arch.logger.removeHandler(self.log_handler) def create_products(self, mission: str) -> None: @@ -385,23 +455,59 @@ def email(self, mission: str) -> None: email.logger.setLevel(self._log_levels[self.args.verbose]) email.logger.addHandler(self.log_handler) - def cleanup(self, mission: str) -> None: - self.logger.info( - "Removing %s files from %s and %s", - mission, - MISSIONNETCDFS, - MISSIONLOGS, - ) - try: - shutil.rmtree( - Path(self.args.base_path, self.vehicle, MISSIONLOGS, mission), - ) - shutil.rmtree( - Path(self.args.base_path, self.vehicle, MISSIONNETCDFS, mission), + def _remove_empty_parents(self, path: Path, stop_at: Path) -> None: + """Remove empty parent directories up to stop_at path.""" + parent = path.parent + while parent != stop_at: + try: + ds_store = parent / ".DS_Store" + if ds_store.exists(): + ds_store.unlink() # Remove .DS_Store file so that the directory is empty + if parent.exists() and not any(parent.iterdir()): + self.logger.debug("Removing empty directory: %s", parent) + parent.rmdir() + parent = parent.parent + else: + break + except OSError as e: + self.logger.debug("Could not remove directory %s: %s", parent, e) + break + + def cleanup(self, mission: str = None, log_file: str = None) -> None: + if mission: + self.logger.info( + "Removing mission %s files from %s and %s", + mission, + MISSIONNETCDFS, + MISSIONLOGS, ) - self.logger.info("Done removing %s work files", mission) - except FileNotFoundError as e: - self.logger.info("File not found: %s", e) + try: + shutil.rmtree( + Path(self.args.base_path, self.vehicle, MISSIONLOGS, mission), + ) + shutil.rmtree( + Path(self.args.base_path, self.vehicle, MISSIONNETCDFS, mission), + ) + self.logger.info("Done removing %s work files", mission) + except FileNotFoundError as e: + self.logger.info("File not found: %s", e) + elif log_file: + self.logger.info("Removing work files from local directory for %s", log_file) + try: + log_path = Path(BASE_LRAUV_PATH, log_file).resolve() + for item in log_path.parent.iterdir(): + if item.is_file(): + self.logger.debug("Removing file %s", item) + item.unlink() + elif item.is_dir(): + self.logger.debug("Removing directory %s", item) + shutil.rmtree(item) + self._remove_empty_parents(log_path, Path(BASE_LRAUV_PATH)) + self.logger.info("Done removing work files for %s", log_file) + except FileNotFoundError as e: + self.logger.info("File not found: %s", e) + else: + self.logger.error("Either mission or log_file must be provided for cleanup.") def process_mission(self, mission: str, src_dir: str = "") -> None: # noqa: C901, PLR0912, PLR0915 netcdfs_dir = Path( @@ -621,6 +727,55 @@ def process_missions(self, start_year: int) -> None: src_dir=self.get_mission_dir(mission), ) + # ====================== LRAUV data specific processing ====================== + # The command line arument --log_file distinguishes LRAUV data from Dorado data. + # Dorado class data uses --mission instead. Also, start and end specifications + # are different for LRAUV data: --start and --end instead of --start_year, + # --start_yd, --end_year, and --end_yd. If --start and --end are spcified then + # --auv_name is required to look up the individual log files to process. + + def extract(self, log_file: str) -> None: + self.logger.info("Extracting log file: %s", log_file) + extract = Extract() + extract.args = argparse.Namespace() + extract.args.verbose = self.args.verbose + extract.logger.setLevel(self._log_levels[self.args.verbose]) + extract.logger.addHandler(self.log_handler) + + url = os.path.join(BASE_LRAUV_WEB, log_file) # noqa: PTH118 + output_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) + extract.logger.info("Downloading %s", url) + input_file = extract.download_with_pooch(url, output_dir) + return extract.extract_groups_to_files_netcdf4(input_file) + + @log_file_processor + def process_log_file(self, log_file: str) -> None: + netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) + Path(netcdfs_dir).mkdir(parents=True, exist_ok=True) + self.log_handler = logging.FileHandler( + Path(BASE_LRAUV_PATH, f"{log_file}_extract.log"), mode="w+" + ) + self.log_handler.setLevel(self._log_levels[self.args.verbose]) + self.log_handler.setFormatter(AUV_NetCDF._formatter) + self.logger.info( + "=====================================================================================================================", + ) + self.logger.addHandler(self.log_handler) + self.logger.info("commandline = %s", self.commandline) + + netcdfs_dir = self.extract(log_file) + # self.align(log_file) + # self.resample(log_file) + # self.create_products(log_file) + self.logger.info("Finished processing log file: %s", log_file) + + def process_log_files(self) -> None: + if self.args.log_file: + # log_file is string like: + # brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4 + self.vehicle = self.args.log_file.split("/")[0].lower() + self.process_log_file(self.args.log_file) + def process_command_line(self): parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, @@ -741,7 +896,12 @@ def process_command_line(self): parser.add_argument( "--mission", action="store", - help="Process only this mission", + help="For Doado class data - process only this mission", + ) + parser.add_argument( + "--log_file", + action="store", + help="For LRAUV class data - process only this log file", ) parser.add_argument( "--freq", From d805f9731ca6f655bf9aed475c46cd07369b580b Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 21 Oct 2025 16:05:05 -0700 Subject: [PATCH 019/121] Update EXPECTED_SIZE_GITHUB values. --- src/data/test_process_dorado.py | 2 +- src/data/test_process_i2map.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index 3eb1033c..bcb41ac1 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -31,7 +31,7 @@ def test_process_dorado(complete_dorado_processing): # but it will alert us if a code change unexpectedly changes the file size. # If code changes are expected to change the file size then we should # update the expected size here. - EXPECTED_SIZE_GITHUB = 621298 + EXPECTED_SIZE_GITHUB = 621286 EXPECTED_SIZE_ACT = 621298 EXPECTED_SIZE_LOCAL = 621286 if str(proc.args.base_path).startswith("/home/runner"): diff --git a/src/data/test_process_i2map.py b/src/data/test_process_i2map.py index cbd2c2c3..e2f6cb05 100644 --- a/src/data/test_process_i2map.py +++ b/src/data/test_process_i2map.py @@ -30,7 +30,7 @@ def test_process_i2map(complete_i2map_processing): # but it will alert us if a code change unexpectedly changes the file size. # If code changes are expected to change the file size then we should # update the expected size here. - EXPECTED_SIZE_GITHUB = 58839 + EXPECTED_SIZE_GITHUB = 58832 EXPECTED_SIZE_ACT = 58816 EXPECTED_SIZE_LOCAL = 58884 if str(proc.args.base_path).startswith("/home/runner"): From 886728ef777c74f076a9973cdff18de39901ed48 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 21 Oct 2025 16:08:19 -0700 Subject: [PATCH 020/121] Update EXPECTED_MD5_GITHUB value. --- src/data/test_process_dorado.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index bcb41ac1..90ec047b 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -50,7 +50,7 @@ def test_process_dorado(complete_dorado_processing): check_md5 = True if check_md5: # Check that the MD5 hash has not changed - EXPECTED_MD5_GITHUB = "6550bb8ed5919f21413f30dfffdcf116" + EXPECTED_MD5_GITHUB = "9f3f9e2e5abed08692ddb233dec0d0ac" EXPECTED_MD5_ACT = "bdb9473e5dedb694618f518b8cf0ca1e" EXPECTED_MD5_LOCAL = "6ecb2229b00835055619e982fe9d5023" if str(proc.args.base_path).startswith("/home/runner"): From d97165a8be8894a0d1aabbee4d5f9d5b5ac44c3e Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 22 Oct 2025 10:23:10 -0700 Subject: [PATCH 021/121] Utility script for seafloor mapping in Monterey Bay in lieu of using XBTs. --- src/data/m1_soundspeed.py | 132 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100755 src/data/m1_soundspeed.py diff --git a/src/data/m1_soundspeed.py b/src/data/m1_soundspeed.py new file mode 100755 index 00000000..43737d6d --- /dev/null +++ b/src/data/m1_soundspeed.py @@ -0,0 +1,132 @@ +#! /usr/bin/env python +""" +Read most recent profile of temperature and practical salinity from the MBARI M1 +mooring in Monterey Bay and return a profile of sound speed as a function of +depth. + +This uses the opendap URL produced on an hourly basis as part of MBARI's SSDS +realtime data system. + +Using Ferret to access the data: +================================ +The most recent profile is retrieved using the SET REGION/L=2156:2156 statement +where the number 2156 is seen as the last index for the L axis (TIME) seen in +the output of the SHOW DATA/VAR statement. Below is a terminal session showing +how to access the data: + +[ssdsadmin@elvis ~]$ ferret + NOAA/PMEL TMAP + FERRET v7.43 (optimized) + Linux 3.10.0-862.11.6.el7.x86_64 64-bit - 09/14/18 + 22-Oct-25 09:20 + +yes? USE "http://dods.mbari.org/opendap/data/ssdsdata/deployments/m1/202507/OS_MBARI-M1_20250724_R_TS.nc" +yes? SHOW DATA/VAR + currently SET data sets: + 1> http://dods.mbari.org/opendap/data/ssdsdata/deployments/m1/202507/OS_MBARI-M1_20250724_R_TS.nc (default) + Hourly Gridded MBARI Mooring M1 Sea Water Temperature and Salinity Observations + name title I J K L + PSAL Hourly sea_water_salinity 1:1 1:1 1:11 1:2156 + 1 on grid GEN1 with -1.E+34 for missing data + X=122.5W(-122.5):121.5W(-121.5) Y=36.3N:37.3N Z=0:325 + PSAL_QC quality flag 1:1 1:1 1:11 1:2156 + on grid GEN1 with -1.E+34 for missing data + X=122.5W(-122.5):121.5W(-121.5) Y=36.3N:37.3N Z=0:325 + TEMP Hourly sea_water_temperature 1:1 1:1 1:11 1:2156 + celsius on grid GEN1 with -1.E+34 for missing data + X=122.5W(-122.5):121.5W(-121.5) Y=36.3N:37.3N Z=0:325 + TEMP_QC quality flag 1:1 1:1 1:11 1:2156 + on grid GEN1 with -1.E+34 for missing data + X=122.5W(-122.5):121.5W(-121.5) Y=36.3N:37.3N Z=0:325 + TIME_QC Quality flag for time axis, 1: ... ... ... 1:2156 + flag on grid GEN2 with -1.E+34 for missing data + + POSITION_QC + Quality flag for Latitude and L 1:1 ... ... ... + on grid GEN3 with -1.E+34 for missing data + X=122.5W(-122.5):121.5W(-121.5) + DEPTH_QC Quality flag for depth axis, 1: ... ... 1:11 ... + on grid GEN4 with -1.E+34 for missing data + Z=0:325 + + time range: 24-JUL-2025 18:30 to 22-OCT-2025 13:30 + +yes? SET REGION/L=2156:2156 +yes? LIST TEMP, PSAL + DATA SET: http://dods.mbari.org/opendap/data/ssdsdata/deployments/m1/202507/OS_MBARI-M1_20250724_R_TS.nc + Hourly Gridded MBARI Mooring M1 Sea Water Temperature and Salinity Observations + DEPTH (m): 0 to 325 + LONGITUDE: 122W(-122) + LATITUDE: 36.8N + TIME: 22-OCT-2025 13:30 + Column 1: TEMP is Hourly sea_water_temperature (celsius) + Column 2: PSAL is Hourly sea_water_salinity (1) + TEMP PSAL +1 / 1: 16.38 33.32 +10 / 2: 16.39 33.32 +20 / 3: 15.43 33.28 +40 / 4: 13.30 33.42 +60 / 5: 11.95 33.51 +80 / 6: 11.33 33.61 +100 / 7: 11.01 33.63 +150 / 8: 10.09 33.81 +200 / 9: 9.67 33.93 +250 / 10: 9.12 34.08 +300 / 11: 7.92 34.09 +yes? quit + + +Using Python to access the data: +================================ +The Xarray library and variety of Python packages provides similar ease-of-use +capability in more modern computational environments. This module provides that +implementation. There are two dependencies that need to be installed via pip or +some other package manager: + gsw + xarray + +e.g. pip install gsw xarray + +__author__ = "Mike McCann" +__copyright__ = "Copyright 2025, Monterey Bay Aquarium Research Institute" +""" # noqa: E501 + +import gsw +import xarray as xr + +# Source for realtime M1 mooring data +url = ( + "http://dods.mbari.org/opendap/data/ssdsdata/deployments/m1/202507/OS_MBARI-M1_20250724_R_TS.nc" +) +ds = xr.open_dataset(url) + +# Select the most recent profile by indexing the TIME dimension +latest = ds.isel(TIME=-1) +temp = latest["TEMP"].to_numpy.flatten() +psal = latest["PSAL"].to_numpy.flatten() +depth = latest["DEPTH"].to_numpy.flatten() + +# Convert practical salinity to absolute salinity using lat and lon of M1 +# mooring from the index data in the dataset +lon = ds["LONGITUDE"].to_numpy.item() +lat = ds["LATITUDE"].to_numpy.item() +abs_sal = gsw.SA_from_SP(psal, depth, lon, lat) + +# Print out a header showing time, lat, lon and data source similar to Ferret output +time_str = str(latest["TIME"].to_numpy) +time_str = time_str.split(".")[0] + " UTC" # Remove fractional seconds +print("Most recent sound speed profile from M1 mooring") # noqa: T201 +print("===============================================") # noqa: T201 +print(f"Data source: {url}") # noqa: T201 +print(f"Title: {ds.title}") # noqa: T201 +print(f"Latitude: {lat:.2f}") # noqa: T201 +print(f"Longitude: {lon:.2f}") # noqa: T201 +print(f"Time: {time_str}") # noqa: T201 +print() # noqa: T201 + +# Calculate sound speed using the Gibbs Seawater (GSW) Oceanographic Toolbox +# Print out the profile of sound speed as a table +soundspeed = gsw.sound_speed(abs_sal, temp, depth) +print(f"{'Depth (m)':>10} {'Sound Speed (m/s)':>20}") # noqa: T201 +for d, c in zip(depth, soundspeed, strict=True): + print(f"{d:10.2f} {c:20.2f}") # noqa: T201 From 7f85fd17890b8c778f00131174021c109fa57e46 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 22 Oct 2025 10:53:15 -0700 Subject: [PATCH 022/121] Fixes for ruff and add installation instructions. --- src/data/m1_soundspeed.py | 56 ++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 9 deletions(-) diff --git a/src/data/m1_soundspeed.py b/src/data/m1_soundspeed.py index 43737d6d..6a5d8f39 100755 --- a/src/data/m1_soundspeed.py +++ b/src/data/m1_soundspeed.py @@ -85,8 +85,45 @@ gsw xarray -e.g. pip install gsw xarray - +Installation: +------------- +1. Create a directory to hold this script and a virtual environment: + mkdir m1_soundspeed + cd m1_soundspeed +2. Create a virtual environment (optional but recommended): + python3 -m venv venv +3. Activate the virtual environment: + source venv/bin/activate +4. Install the required packages: + pip install gsw xarray +5. Save this script as m1_soundspeed.py +6. Run the script: + python m1_soundspeed.py + +Sample Output: +============== +python m1_soundspeed.py + +Most recent sound speed profile from M1 mooring +----------------------------------------------- +Data source: http://dods.mbari.org/opendap/data/ssdsdata/deployments/m1/202507/OS_MBARI-M1_20250724_R_TS.nc +Title: Hourly Gridded MBARI Mooring M1 Sea Water Temperature and Salinity Observations +Latitude: 36.75 +Longitude: -122.03 +Time: 2025-10-22T14:30:00 UTC + + Depth (m) Sound Speed (m/s) + 1.00 1508.89 + 10.00 1508.96 + 20.00 1505.97 + 40.00 1501.15 + 60.00 1496.70 + 80.00 1494.31 + 100.00 1493.70 + 150.00 1490.97 + 200.00 1490.73 + 250.00 1489.76 + 300.00 1486.44 __author__ = "Mike McCann" __copyright__ = "Copyright 2025, Monterey Bay Aquarium Research Institute" """ # noqa: E501 @@ -102,21 +139,22 @@ # Select the most recent profile by indexing the TIME dimension latest = ds.isel(TIME=-1) -temp = latest["TEMP"].to_numpy.flatten() -psal = latest["PSAL"].to_numpy.flatten() -depth = latest["DEPTH"].to_numpy.flatten() +temp = latest["TEMP"].to_numpy().flatten() +psal = latest["PSAL"].to_numpy().flatten() +depth = latest["DEPTH"].to_numpy().flatten() # Convert practical salinity to absolute salinity using lat and lon of M1 # mooring from the index data in the dataset -lon = ds["LONGITUDE"].to_numpy.item() -lat = ds["LATITUDE"].to_numpy.item() +lon = ds["LONGITUDE"].to_numpy().item() +lat = ds["LATITUDE"].to_numpy().item() abs_sal = gsw.SA_from_SP(psal, depth, lon, lat) # Print out a header showing time, lat, lon and data source similar to Ferret output -time_str = str(latest["TIME"].to_numpy) +time_str = str(latest["TIME"].to_numpy()) time_str = time_str.split(".")[0] + " UTC" # Remove fractional seconds +print() # noqa: T201 print("Most recent sound speed profile from M1 mooring") # noqa: T201 -print("===============================================") # noqa: T201 +print("-----------------------------------------------") # noqa: T201 print(f"Data source: {url}") # noqa: T201 print(f"Title: {ds.title}") # noqa: T201 print(f"Latitude: {lat:.2f}") # noqa: T201 From 80450c7f7473a5ea7b22140ce095576c4532e048 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 22 Oct 2025 12:14:43 -0700 Subject: [PATCH 023/121] Add gsw as we need to migrate from seawater. --- pyproject.toml | 1 + uv.lock | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9a21f413..f6684c65 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ dependencies = [ "datashader>=0.18.1", "defusedxml>=0.7.1", "gitpython>=3.1.44", + "gsw>=3.6.20", "hvplot>=0.11.3", "ipympl>=0.9.7", "jupyter>=1.1.1", diff --git a/uv.lock b/uv.lock index e3c41136..82bbff19 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = "==3.12.*" [[package]] @@ -175,6 +175,7 @@ dependencies = [ { name = "datashader" }, { name = "defusedxml" }, { name = "gitpython" }, + { name = "gsw" }, { name = "hvplot" }, { name = "ipympl" }, { name = "jupyter" }, @@ -206,6 +207,7 @@ requires-dist = [ { name = "datashader", specifier = ">=0.18.1" }, { name = "defusedxml", specifier = ">=0.7.1" }, { name = "gitpython", specifier = ">=3.1.44" }, + { name = "gsw", specifier = ">=3.6.20" }, { name = "hvplot", specifier = ">=0.11.3" }, { name = "ipympl", specifier = ">=0.9.7" }, { name = "jupyter", specifier = ">=1.1.1" }, @@ -624,6 +626,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1d/9a/4114a9057db2f1462d5c8f8390ab7383925fe1ac012eaa42402ad65c2963/GitPython-3.1.44-py3-none-any.whl", hash = "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110", size = 207599, upload-time = "2025-01-02T07:32:40.731Z" }, ] +[[package]] +name = "gsw" +version = "3.6.20" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/39/edd76e26b0c8b8a6bcee0107cbcee5219673bb59f274b757de9f989a0fb1/gsw-3.6.20.tar.gz", hash = "sha256:e528cd6563fdc09b244387bfebf131b01199c20ac248f4e5b4eaf00cded1abe6", size = 2702713, upload-time = "2025-08-04T18:04:14.669Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/d9/18382b8fe6e8736bad967dd4ed8ab2c2deabbb9f6121d9e41265e7317f24/gsw-3.6.20-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9656dcb42ddeee8134f2bb6d7394928b0b8629634c9e223f9cce7a3c7309597c", size = 2222002, upload-time = "2025-08-04T18:03:34.123Z" }, + { url = "https://files.pythonhosted.org/packages/9f/85/3a9ba4372ac4291e38e887ed8dac44c0385d4b72ee967a7858c4c7a48d96/gsw-3.6.20-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:857a1f0804980186514a0690e0f7dbdffd15a17059649771f3d3a84771e8fb8f", size = 2261350, upload-time = "2025-08-04T18:03:35.481Z" }, + { url = "https://files.pythonhosted.org/packages/dc/36/c3d845de2e453a01f6b1cb099c63ab63c581814d638890c143d064a33a8d/gsw-3.6.20-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b5a143b2993ac150c5b3cb7edf942d1376a20abbc57cc3d8ec4a5a430632890", size = 2400962, upload-time = "2025-08-04T18:03:37.194Z" }, + { url = "https://files.pythonhosted.org/packages/8f/f1/5b6999c89b3ea20cd9ac1169e0cd7c820a881ca97d6b34c7899da28a3d17/gsw-3.6.20-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:33ca2560378d1719fa49dcd380ce0c4a261b01cbd2aa865a3c6c99bfb90b5853", size = 2443576, upload-time = "2025-08-04T18:03:38.782Z" }, + { url = "https://files.pythonhosted.org/packages/13/ed/419237d32a704e4b4bbfcdec8129fbb381ccdf2e33a2cc7d1153c1a1eaa0/gsw-3.6.20-cp312-cp312-win_amd64.whl", hash = "sha256:719d1983bd97991e4e44c1c725322269fc7019c29abc7a641e6a676f1a54f54e", size = 2180514, upload-time = "2025-08-04T18:03:40.217Z" }, +] + [[package]] name = "h11" version = "0.16.0" @@ -1387,6 +1405,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2d/1a/32b7427aaf62fed3d4e4456f874b25ce39373dbddf6cfde9edbcfc2417fc/netCDF4-1.7.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb95b11804fe051897d1f2044b05d82a1847bc2549631cdd2f655dde7de77a9c", size = 9377415, upload-time = "2024-10-22T19:00:54.412Z" }, { url = "https://files.pythonhosted.org/packages/fd/bf/5e671495c8bdf6b628e091aa8980793579474a10e51bc6ba302a3af6a778/netCDF4-1.7.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9d8a848373723f41ef662590b4f5e1832227501c9fd4513e8ad8da58c269977", size = 9260579, upload-time = "2024-10-22T19:00:56.594Z" }, { url = "https://files.pythonhosted.org/packages/d4/57/0a0bcdebcfaf72e96e7bcaa512f80ee096bf71945a3318d38253338e9c25/netCDF4-1.7.2-cp312-cp312-win_amd64.whl", hash = "sha256:568ea369e00b581302d77fc5fd0b8f78e520c7e08d0b5af5219ba51f3f1cd694", size = 6991523, upload-time = "2024-10-22T19:00:58.97Z" }, + { url = "https://files.pythonhosted.org/packages/84/0a/182bb4fe5639699ba39d558b553b8e6f04fbfea6cf78404c0f21ef149bf7/netcdf4-1.7.2-cp311-abi3-macosx_13_0_x86_64.whl", hash = "sha256:7e81c3c47f2772eab0b93fba8bb05b17b58dce17720e1bed25e9d76551deecd0", size = 2751391, upload-time = "2025-10-13T18:32:22.749Z" }, + { url = "https://files.pythonhosted.org/packages/2d/1f/54ac27c791360f7452ca27ed1cb2917946bbe1ea4337c590a5abcef6332d/netcdf4-1.7.2-cp311-abi3-macosx_14_0_arm64.whl", hash = "sha256:cb2791dba37fc98fd1ac4e236c97822909f54efbcdf7f1415c9777810e0a28f4", size = 2387513, upload-time = "2025-10-13T18:32:27.499Z" }, + { url = "https://files.pythonhosted.org/packages/5c/5e/9bf3008a9e45c08f4c9fedce4d6f722ef5d970f56a9c5eb375a200dd2b66/netcdf4-1.7.2-cp311-abi3-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bf11480f6b8a5b246818ffff6b4d90481e51f8b9555b41af0c372eb0aaf8b65f", size = 9621674, upload-time = "2025-10-13T18:32:29.193Z" }, + { url = "https://files.pythonhosted.org/packages/a1/75/46871e85f2bbfb1efe229623d25d7c9daa17e2e968d5235572b2c8bb53e8/netcdf4-1.7.2-cp311-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1ccc05328a8ff31921b539821791aeb20b054879f3fdf6d1d505bf6422824fec", size = 9453759, upload-time = "2025-10-13T18:32:31.136Z" }, + { url = "https://files.pythonhosted.org/packages/cd/10/c52f12297965938d9b9be666ea1f9d8340c2aea31d6909d90aa650847248/netcdf4-1.7.2-cp311-abi3-win_amd64.whl", hash = "sha256:999bfc4acebf400ed724d5e7329e2e768accc7ee1fa1d82d505da782f730301b", size = 7148514, upload-time = "2025-10-13T18:32:33.121Z" }, ] [[package]] From dbbff83db0f792d73ace8a8c453029276624b95e Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 22 Oct 2025 14:23:15 -0700 Subject: [PATCH 024/121] Remove obvious methods that dealt with Dorado log and sensor files. --- src/data/combine.py | 2364 ++----------------------------------------- 1 file changed, 60 insertions(+), 2304 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index 2bddec3e..bfa0e1bf 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -6,11 +6,16 @@ Read original data from netCDF files created by nc42netcdfs.py and write out a single netCDF file with the important variables at original sampling intervals. Geometric alignment and any plumbing lag corrections are also done during this -step. The file will contain combined variables (the combined_nc member variable) -and be analogous to the original netCDF4. Rather than using groups in netCDF-4 -the data will be written in classic netCDF-CF with a naming convention that is -similar to Dorado data, with group names (without underscores) preceeding the -variable name with an underscore: +step. This script is similar to calibrate.py that is used for Dorado and i2map +data, but does not apply any sensor calibrations as those are done on the LRAUV +vehicles before the data is logged and unserialized to NetCDF-4 files. The QC +methods implemented in calibrate.py will be reused here. + +The file will contain combined variables (the combined_nc member variable) and +be analogous to the original NetCDF-4. Rather than using groups in NetCDF-4 the +data will be written in classic NetCDF-CF with a naming convention that is +similar to Dorado data, with group names (any underscores removed) preceeding +the variable name with an underscore - all lower case characters: ``` _ _<..........> @@ -20,6 +25,9 @@ _latitude _longitude ``` +The file will be named with a "_cal.nc" suffix to be consistent with the Dorado +and i2map files, indicating the stage of processing. + """ __author__ = "Mike McCann" @@ -27,32 +35,24 @@ import argparse # noqa: I001 import logging -import os -import shlex import shutil -import subprocess import sys import time from argparse import RawTextHelpFormatter -from collections import OrderedDict from datetime import UTC, datetime from pathlib import Path from socket import gethostname from typing import NamedTuple import cf_xarray # Needed for the .cf accessor # noqa: F401 -import defusedxml.ElementTree as ET # noqa: N817 import matplotlib.pyplot as plt import numpy as np import xarray as xr from scipy.interpolate import interp1d -from seawater import eos80 import pandas as pd import pyproj from AUV import monotonic_increasing_time_indices, nudge_positions -from hs2_proc import compute_backscatter, hs2_calc_bb, hs2_read_cal_file from logs2netcdfs import BASE_PATH, MISSIONLOGS, MISSIONNETCDFS, TIME, TIME60HZ, AUV_NetCDF -from scipy import signal AVG_SALINITY = 33.6 # Typical value for upper 100m of Monterey Bay @@ -146,462 +146,7 @@ def align_geom(sensor_offset, pitches): return offsets -class Coeffs: - pass - - -# History of seabird25p.cfg file changes: - -# [mccann@elvis i2MAP]$ pwd -# /mbari/M3/master/i2MAP -# [mccann@elvis i2MAP]$ ls -l */*/*/*/seabird25p.cfg -# -rwx------. 1 519 games 3050 Sep 20 2016 2017/01/20170117/2017.017.00/seabird25p.cfg -# -rwx------. 1 519 games 3050 Sep 20 2016 2017/01/20170117/2017.017.01/seabird25p.cfg -# -rwx------. 1 lonny nobody 3050 Sep 20 2016 2017/04/20170407/2017.097.00/seabird25p.cfg -# -rwx------. 1 robs games 3050 Sep 20 2016 2017/05/20170508/2017.128.00/seabird25p.cfg -# -rwx------. 1 robs games 3109 May 11 2017 2017/05/20170512/2017.132.00/seabird25p.cfg -# -rwx------. 1 robs games 3109 May 11 2017 2017/06/20170622/2017.173.00/seabird25p.cfg -# -rwx------. 1 519 games 3109 May 11 2017 2017/08/20170824/2017.236.00/seabird25p.cfg -# -rwx------. 1 519 games 3109 May 11 2017 2017/09/20170914/2017.257.00/seabird25p.cfg -# -rwx------. 1 etrauschke games 3109 Jan 29 2018 2018/01/20180125/2018.025.00/seabird25p.cfg -# -rwx------. 1 henthorn games 3109 Feb 15 2018 2018/02/20180214/2018.045.03/seabird25p.cfg -# -rwx------. 1 lonny games 3667 Mar 2 2018 2018/03/20180306/2018.065.02/seabird25p.cfg -# -rwx------. 1 lonny games 3667 Mar 2 2018 2018/04/20180404/2018.094.00/seabird25p.cfg -# -rwx------. 1 lonny games 3667 Mar 2 2018 2018/06/20180618/2018.169.01/seabird25p.cfg -# -rwx------. 1 lonny games 3667 Jul 19 2018 2018/07/20180718/2018.199.00/seabird25p.cfg -# -rwx------. 1 jana games 3667 Aug 30 2018 2018/08/20180829/2018.241.01/seabird25p.cfg -# -rwx------. 1 lonny games 3667 Oct 25 2018 2018/10/20181023/2018.296.00/seabird25p.cfg -# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181203/2018.337.00/seabird25p.cfg -# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181210/2018.344.01/seabird25p.cfg -# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181210/2018.344.05/seabird25p.cfg -# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181210/2018.344.06/seabird25p.cfg -# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181210/2018.344.07/seabird25p.cfg -# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181210/2018.344.08/seabird25p.cfg -# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181210/2018.344.09/seabird25p.cfg -# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181210/2018.344.10/seabird25p.cfg -# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181210/2018.344.11/seabird25p.cfg -# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181210/2018.344.12/seabird25p.cfg -# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181210/2018.344.13/seabird25p.cfg -# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181214/2018.348.00/seabird25p.cfg -# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181214/2018.348.01/seabird25p.cfg -# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181214/2018.348.02/seabird25p.cfg -# -rwx------. 1 jana games 3667 Mar 2 2018 2018/12/20181214/2018.348.03/seabird25p.cfg -# -rwx------. 1 lonny games 3667 Mar 2 2018 2019/01/20190107/2019.007.07/seabird25p.cfg -# -rwx------. 1 lonny games 3667 Mar 2 2018 2019/01/20190107/2019.007.09/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190204/2019.035.10/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190226/2019.057.00/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190226/2019.057.01/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190226/2019.057.02/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190226/2019.057.03/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190226/2019.057.04/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190226/2019.057.05/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190226/2019.057.06/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190226/2019.057.07/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190226/2019.057.08/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/02/20190228/2019.059.01/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/04/20190408/2019.098.01/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/06/20190606/2019.157.00/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/06/20190606/2019.157.01/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/06/20190606/2019.157.02/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/07/20190709/2019.190.00/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/09/20190916/2019.259.01/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/10/20191007/2019.280.02/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/10/20191021/2019.294.00/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/11/20191107/2019.311.00/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2019/12/20191210/2019.344.06/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2020/01/20200108/2020.008.00/seabird25p.cfg -# -rwx------. 1 mbassett nobody 3667 Mar 2 2018 2020/02/20200210/2020.041.02/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2020/02/20200224/2020.055.01/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2020/06/20200629/2020.181.02/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2020/07/20200728/2020.210.03/seabird25p.cfg -# -rwx------. 1 lonny nobody 3667 Mar 2 2018 2020/08/20200811/2020.224.04/seabird25p.cfg -# -rwx------. 1 lonny nobody 3899 Sep 11 2020 2020/09/20200914/2020.258.01/seabird25p.cfg -# -rwx------. 1 lonny nobody 3919 Sep 21 2020 2020/09/20200922/2020.266.01/seabird25p.cfg -# -rwxr-xr-x. 1 brian games 4267 Mar 1 2021 2021/03/20210303/2021.062.01/seabird25p.cfg -# -rwxr-xr-x. 1 robs games 4267 Mar 1 2021 2021/03/20210330/2021.089.00/seabird25p.cfg -# -rwxr-xr-x. 1 robs games 4267 Mar 1 2021 2021/05/20210512/2021.132.01/seabird25p.cfg -# -rwxr-xr-x. 1 robs games 4267 Mar 1 2021 2021/06/20210624/2021.175.03/seabird25p.cfg -# -rwx------. 1 lonny nobody 4267 Mar 1 2021 2021/09/20210921/2021.264.03/seabird25p.cfg -# -rwx------. 1 lonny nobody 4267 Mar 1 2021 2021/10/20211018/2021.291.00/seabird25p.cfg -# -rwx------. 1 lonny nobody 4267 Mar 1 2021 2021/11/20211103/2021.307.02/seabird25p.cfg -# -rwx------. 1 lonny nobody 4267 Mar 1 2021 2022/03/20220302/2022.061.01/seabird25p.cfg - - -def _calibrated_temp_from_frequency(cf, nc): - # From processCTD.m: - # TC = 1./(t_a + t_b*(log(t_f0./temp_frequency)) + t_c*((log(t_f0./temp_frequency)).^2) + t_d*((log(t_f0./temp_frequency)).^3)) - 273.15; # noqa: E501 - # From Seabird25p.cc: - # if (*_t_coefs == 'A') { - # f = ::log(T_F0/f); - # T = 1/(T_A + (T_B + (T_C + T_D*f)*f)*f) - 273.15; - # } - # else if (*_t_coefs == 'G') { - # f = ::log(T_GF0/f); - # T = 1/(T_G + (T_H + (T_I + T_J*f)*f)*f) - 273.15; - # } - K2C = 273.15 - if cf.t_coefs == "A": - calibrated_temp = ( - 1.0 - / ( - cf.t_a - + cf.t_b * np.log(cf.t_f0 / nc["temp_frequency"].to_numpy()) - + cf.t_c * np.power(np.log(cf.t_f0 / nc["temp_frequency"]), 2) - + cf.t_d * np.power(np.log(cf.t_f0 / nc["temp_frequency"]), 3) - ) - - K2C - ) - elif cf.t_coefs == "G": - calibrated_temp = ( - 1.0 - / ( - cf.t_g - + cf.t_h * np.log(cf.t_gf0 / nc["temp_frequency"].to_numpy()) - + cf.t_i * np.power(np.log(cf.t_gf0 / nc["temp_frequency"]), 2) - + cf.t_j * np.power(np.log(cf.t_gf0 / nc["temp_frequency"]), 3) - ) - - K2C - ) - else: - error_message = f"Unknown t_coefs: {cf.t_coefs}" - raise ValueError(error_message) - - return calibrated_temp - - -def _calibrated_sal_from_cond_frequency(args, combined_nc, logger, cf, nc, temp): # noqa: PLR0913 - # Comments carried over from doradosdp's processCTD.m: - # Note that recalculation of conductivity and correction for thermal mass - # are possible, however, their magnitude results in salinity differences - # of less than 10^-4. - # In other regions where these corrections are more significant, the - # corrections can be turned on. - # conductivity at S=35 psu , T=15 C [ITPS 68] and P=0 db) ==> 42.914 - sw_c3515 = 42.914 - eps = np.spacing(1) - - f_interp = interp1d( - combined_nc["depth_time"].to_numpy().tolist(), - combined_nc["depth_filtpres"].to_numpy(), - fill_value=( - combined_nc["depth_filtpres"].to_numpy()[0], - combined_nc["depth_filtpres"].to_numpy()[-1], - ), - bounds_error=False, - ) - p1 = f_interp(nc["time"].to_numpy().tolist()) - if args.plot: - pbeg = 0 - pend = len(combined_nc["depth_time"]) - if args.plot.startswith("first"): - pend = int(args.plot.split("first")[1]) - plt.figure(figsize=(18, 6)) - plt.plot( - combined_nc["depth_time"][pbeg:pend], - combined_nc["depth_filtpres"][pbeg:pend], - ":o", - nc["time"][pbeg:pend], - p1[pbeg:pend], - "o", - ) - plt.legend(("Pressure from parosci", "Interpolated to ctd time")) - title = "Comparing Interpolation of Pressure to CTD Time" - title += f" - First {pend} Points from each series" - plt.title(title) - plt.grid() - logger.debug("Pausing with plot entitled: %s. Close window to continue.", title) - plt.show() - - # Conductivity Calculation - # cfreq=cond_frequency/1000; - # c1 = (c_a*(cfreq.^c_m)+c_b*(cfreq.^2)+c_c+c_d*TC)./(10*(1+eps*p1)); - # - # seabird25p.cc: https://bitbucket.org/mbari/dorado-auv-qnx/src/master/auv/altex/onboard/seabird25p/Seabird25p.cc - # if(*_c_coefs == 'A') { - # C = (C_A*pow(f,C_M) + C_B*f*f +C_C +C_D*t)/(10*(1+EPS*p)); - # } - # else if(*_c_coefs == 'G') { - # C = (C_G +(C_H +(C_I + C_J*f)*f)*f*f) / (10.*(1+C_TCOR*t+C_PCOR*p)) ; - # } - # else { - # Syslog::write("Seabird25p::calculate_Cond(): no c_coefs set selected.\n"); - # C=0; - # } - cfreq = nc["cond_frequency"].to_numpy() / 1000.0 - - if cf.c_coefs == "A": - calibrated_conductivity = ( - cf.c_a * np.power(cfreq, cf.c_m) - + cf.c_b * np.power(cfreq, 2) - + cf.c_c - + cf.c_d * temp.to_numpy() - ) / (10 * (1 + eps * p1)) - elif cf.c_coefs == "G": - # C = (C_G +(C_H +(C_I + C_J*f)*f)*f*f) / (10.*(1+C_TCOR*t+C_PCOR*p)) ; - calibrated_conductivity = ( - cf.c_g + (cf.c_h + (cf.c_i + cf.c_j * cfreq) * cfreq) * np.power(cfreq, 2) - ) / (10 * (1 + cf.c_tcor * temp.to_numpy() + cf.c_pcor * p1)) - else: - error_message = f"Unknown c_coefs: {cf.c_coefs}" - raise ValueError(error_message) - - # % Calculate Salinty - # cratio = c1*10/sw_c3515; % sw_C is conductivity value at 35,15,0 - # CTD.salinity = sw_salt(cratio,CTD.temperature,p1); % (psu) - # seabird25p.cc: https://bitbucket.org/mbari/dorado-auv-qnx/src/master/auv/altex/onboard/seabird25p/Seabird25p.cc - # // - # // rsm 28 Mar 07: Compute salinity from conductivity, temperature and - # // presssure: - # cndr = 10.*read_cond/sw_c3515(); - # salinity = sw_salt( cndr, read_temp, depthSensor_pres); - cratio = calibrated_conductivity * 10 / sw_c3515 - calibrated_salinity = eos80.salt(cratio, temp, p1) - - return calibrated_conductivity, calibrated_salinity - - -def _oxsat(temperature, salinity): - # - # ---------------------------------- - # Oxygen saturation: f(T,S); ml/l - # ---------------------------------- - # TK = 273.15+T; % degrees Kelvin - # A1 = -173.4292; A2 = 249.6339; A3 = 143.3483; A4 = -21.8492; - # B1 = -0.033096; B2 = 0.014259; B3 = -0.00170; - # OXSAT = exp(A1 + A2*(100./TK) + A3*log(TK/100) + A4*(TK/100) + [S .* (B1 + B2*(TK/100) + (B3*(TK/100).*(TK/100)))] ); # noqa: E501 - tk = 273.15 + temperature # degrees Kelvin - a1 = -173.4292 - a2 = 249.6339 - a3 = 143.3483 - a4 = -21.8492 - b1 = -0.033096 - b2 = 0.014259 - b3 = -0.00170 - return np.exp( - a1 - + a2 * (100 / tk) - + a3 * np.log(tk / 100) - + a4 * (tk / 100) - + np.multiply( - salinity, - b1 + b2 * (tk / 100) + np.multiply(b3 * (tk / 100), (tk / 100)), - ), - ) - - -def _calibrated_O2_from_volts( # noqa: PLR0913 - combined_nc: np.array, - cf: Coeffs, - nc: xr.Dataset, - var_name: str, - temperature: xr.DataArray, - salinity: xr.DataArray, -) -> tuple[np.array, np.array, str, str]: - # Contents of doradosdp's calc_O2_SBE43.m: - # ---------------------------------------- - # function [O2] = calc_O2_SBE43(O2V,T,S,P,O2cal,time,units); - # To calculate Oxygen from sbe voltage - # Reference: W.B. Owens and R.C. Millard, 1985. A new algorithm for CTD oxygen - # calibration, J. Phys. Oceanogr. 15:621-631. - # Also, described in SeaBird application note. - # pltit = 'n'; - # % disp([' Pressure should be in dB']); - f_interp = interp1d( - combined_nc["depth_time"].to_numpy().tolist(), - combined_nc["depth_filtpres"].to_numpy(), - fill_value=( - combined_nc["depth_filtpres"].to_numpy()[0], - combined_nc["depth_filtpres"].to_numpy()[-1], - ), - bounds_error=False, - ) - pressure = f_interp(nc["time"].to_numpy().tolist()) - - # - # ---------------------------------- - # Oxygen voltage - # ---------------------------------- - # % disp([' Minimum of oxygen voltage ' num2str(min(O2V)) ' V']); - # % disp([' Maximum of oxygen voltage ' num2str(max(O2V)) ' V']); - # % disp([' Mean of oxygen voltage ' num2str(mean(O2V)) ' V']); - # docdt = [NaN;[diff(O2V)./diff(time)]]; % slope of oxygen current (uA/sec); - docdt = np.append( - np.nan, - np.divide( - np.diff(nc[var_name]), - np.diff(nc["time"].astype(np.int64).to_numpy() / 1e9), - ), - ) - - oxsat = _oxsat(temperature, salinity) - - # Owens-Millard equation - # - # ---------------------------------- - # Oxygen concentration (mL/L) - # ---------------------------------- - # Constants - # tau=0; - # - # O2 = [O2cal.SOc * ((O2V+O2cal.offset)+(tau*docdt)) + O2cal.BOc * exp(-0.03*T)].*exp(O2cal.Tcor*T + O2cal.Pcor*P).*OXSAT; # noqa: E501 - tau = 0.0 - try: - o2_mll = np.multiply( - cf.SOc * ((nc[var_name].to_numpy() + cf.Voff) + (tau * docdt)) - + cf.BOc * np.exp(-0.03 * temperature.to_numpy()), - np.multiply( - np.exp(cf.TCor * temperature.to_numpy() + cf.PCor * pressure), - oxsat.to_numpy(), - ), - ) - except AttributeError as e: - error_message = f"Cannot calculate o2_mll: {e}" - raise ValueError(error_message) from e - - # - # if strcmp(units,'umolkg')==1 - # ---------------------------------- - # Convert to umol/kg - # ---------------------------------- - # SeaBird equations are for ml/l computations - # Can convert OXSAT at atmospheric pressure to mg/l by 1.4276 - # Convert dissolved O2 to mg/l using density of oxygen = 1.4276 kg/m^3 - # dens=sw_dens(S,T,P); - # O2 = (O2 * 1.4276) .* (1e6./(dens*32)); - dens = eos80.dens(salinity.to_numpy(), temperature.to_numpy(), pressure) - o2_umolkg = np.multiply(o2_mll * 1.4276, (1.0e6 / (dens * 32))) - - return o2_mll, o2_umolkg - - -def _calibrated_O2_from_volts_SBE43( # noqa: PLR0913 - combined_nc: np.array, - cf: Coeffs, - nc: xr.Dataset, - var_name: str, - temperature: xr.DataArray, - salinity: xr.DataArray, -) -> tuple[np.array, np.array]: - # Written to handle the seabird25p O2 sensor from the i2map vehicle - October 2023 - # - Uses Equation 1 from the SeaBird 25p manual - # - # See for example: "/Volumes/DMO/MDUC_CORE_CTD_200103/Calibration Files/SBE-43/2510/2014_sep/SBE 43 O2510 09Sep14.pdf" # noqa: E501 - # Soc = oxygen calibration coefficient (ml/l/V) - # V = measured voltage (V) - # Voffset = voltage offset (V) - # A = temperature compensation coefficient (1/°C) - # B = temperature compensation coefficient (1/°C) - # C = temperature compensation coefficient (1/°C) - # T = temperature (°C, ITS-90) - # E = pressure compensation coefficient (1/dbar) - # K = temperature (°K) - # P = pressure (dbar) - - f_interp = interp1d( - combined_nc["depth_time"].to_numpy().tolist(), - combined_nc["depth_filtpres"].to_numpy(), - fill_value=( - combined_nc["depth_filtpres"].to_numpy()[0], - combined_nc["depth_filtpres"].to_numpy()[-1], - ), - bounds_error=False, - ) - pressure = f_interp(nc["time"].to_numpy().tolist()) - - # Oxsol(T,S) = oxygen saturation (ml/l); P = pressure (dbar) - oxsat = _oxsat(temperature, salinity) - - # Oxygen concentration (ml/l) = Soc * (V + Voffset) * (1.0 + A * T + B * T**2 + C * T**3 ) * Oxsol(T,S) * exp(E * P / K) # noqa: E501 - o2_mll = np.multiply( - cf.Soc * (nc[var_name].to_numpy() + cf.offset), - np.multiply( - ( - 1.0 - + cf.A * temperature.to_numpy() - + cf.B * np.power(temperature.to_numpy(), 2) - + cf.C * np.power(temperature.to_numpy(), 3) - ), - np.multiply( - oxsat.to_numpy(), - np.exp(np.divide(cf.E * pressure, (273.15 + temperature.to_numpy()))), - ), - ), - ) - - # if strcmp(units,'umolkg')==1 - # ---------------------------------- - # Convert to umol/kg - # ---------------------------------- - # SeaBird equations are for ml/l computations - # Can convert OXSAT at atmospheric pressure to mg/l by 1.4276 - # Convert dissolved O2 to mg/l using density of oxygen = 1.4276 kg/m^3 - # dens=sw_dens(S,T,P); - # O2 = (O2 * 1.4276) .* (1e6./(dens*32)); - dens = eos80.dens(salinity.to_numpy(), temperature.to_numpy(), pressure) - o2_umolkg = np.multiply(o2_mll * 1.4276, (1.0e6 / (dens * 32))) - - return o2_mll, o2_umolkg - - -def _beam_transmittance_from_volts(combined_nc, nc) -> tuple[float, float]: - # ---------------------------------------------- - # From: robs - # Subject: Fwd: Merging i2MAP nav and CTD with VARS - # Date: November 14, 2022 at 10:53:04 AM PST - # To: Mike McCann - # - # Oops, I'm sorry! Apparently I sent this to myself (ah, Monday)…. - # - # Begin forwarded message: - # - # From: robs - # Subject: Re: Merging i2MAP nav and CTD with VARS - # Date: November 14, 2022 at 8:34:22 AM PST - # To: Rob Sherlock - # - # Here is the Cal-sheet for the Transmissometer if you need it: - # - # C-Star Calibration - # Date 11.25.14 - # S/N# CST-1694DR - # Pathlength 25 cm - # Analog Output Digital Output - # Vd 0.006 V 0 counts - # Vair 4.830 V 15867 counts - # Vref 4.701 V 15443 counts - - # Relationship of transmittance (Tr) to beam attenuation coefficient (c), - # and pathlength (x, in meters): Tr = exp(-c*x) - - # To determine beam transmittance: Tr = (Vsig - Vd) / (Vref - Vd) - # To determine beam attenuation coefficient: c = -1/x * ln (Tr) - - # Vd Meter output with the beam blocked. This is the offset. - # Vair Meter output in air with a clear beam path. - # Vref Meter output with clean water in the path. - # Temperature of calibration water: temperature of clean water used to obtain Vref. - # Ambient temperature: meter temperature in air during the calibration. - # Vsig Measured signal output of meter. - # - - # Hard-coded values from the calibration sheet, but when they are available - # in the .cfg file, they should be read from cf instead. - Vd = 0.006 - Vref = 4.701 - # - # Return beam transmittance (Tr) and beam attenuation coefficient (c) - Tr = (nc["transmissometer"] - Vd) / (Vref - Vd) - with np.errstate(invalid="ignore"): - c = -1 / 0.25 * np.log(Tr) - - return Tr, c - - -class SensorInfo: - pass - - -class Calibrate_NetCDF: +class Combine_NetCDF: logger = logging.getLogger(__name__) _handler = logging.StreamHandler() _handler.setFormatter(AUV_NetCDF._formatter) @@ -659,200 +204,7 @@ def global_metadata(self): return metadata - def _get_file(self, download_url, local_filename, session): - with session.get(download_url, timeout=60) as resp: - HTTP_OK = 200 - if resp.status != HTTP_OK: - self.logger.warning( - "Cannot read %s, status = %s", - download_url, - resp.status, - ) - else: - self.logger.info("Started download to %s...", local_filename) - with Path(local_filename).open("wb") as handle: - for chunk in resp.content.iter_chunked(1024): - handle.write(chunk) - if self.args.verbose > 1: - self.logger.info("%s(done)", Path(local_filename).name) - - def _define_sensor_info(self, start_datetime): - # Using lower case vehicle names, modify below for changes over time - # Used to reduce ERROR log messages for missing sensor data - self.expected_sensors = { - "dorado": [ - "navigation", - "gps", - "depth", - "ecopuck", - "hs2", - "ctd1", - "ctd2", - "isus", - "biolume", - "lopc", - "tailcone", - ], - "i2map": [ - "navigation", - "gps", - "depth", - "seabird25p", - "transmissometer", - "tailcone", - ], - } - - # Horizontal and vertical distance from origin in meters - # The origin of the x, y coordinate system is location of the - # vehicle's paroscientific depth sensor in the tailcone. - class SensorOffset(NamedTuple): - x: float - y: float - - # Original configuration of Dorado389 - Modify below with changes over time - # This code uses pandas.shift() to apply a lag to the data. Posivite lag_secs - # shifts the data forward in time to account for plumbing delays for the sensor. - # As of April 2023 only integer lag_secs are supported because of pandas.shift(). - self.sinfo = OrderedDict( - [ - ( - "navigation", - { - "data_filename": "navigation.nc", - "cal_filename": None, - "lag_secs": None, - "sensor_offset": None, - }, - ), - ( - "gps", - { - "data_filename": "gps.nc", - "cal_filename": None, - "lag_secs": None, - "sensor_offset": None, - }, - ), - ( - "depth", - { - "data_filename": "parosci.nc", - "cal_filename": None, - "lag_secs": None, - "sensor_offset": SensorOffset(-0.927, -0.076), - }, - ), - ( - "hs2", - { - "data_filename": "hydroscatlog.nc", - "cal_filename": "hs2Calibration.dat", - "lag_secs": None, - "sensor_offset": SensorOffset(0.1397, -0.2794), - }, - ), - ( - "ctd1", - { - "data_filename": "ctdDriver.nc", - "cal_filename": "ctdDriver.cfg", - "lag_secs": None, - "sensor_offset": SensorOffset(1.003, 0.0001), - }, - ), - ( - "ctd2", - { - "data_filename": "ctdDriver2.nc", - "cal_filename": "ctdDriver2.cfg", - "lag_secs": None, - "sensor_offset": SensorOffset(1.003, 0.0001), - }, - ), - ( - "seabird25p", - { - "data_filename": "seabird25p.nc", - "cal_filename": "seabird25p.cfg", - "lag_secs": None, - "sensor_offset": SensorOffset(4.04, 0.0), - }, - ), - ( - "isus", - { - "data_filename": "isuslog.nc", - "cal_filename": None, - "lag_secs": 6, - "sensor_offset": None, - }, - ), - ( - "biolume", - { - "data_filename": "biolume.nc", - "cal_filename": None, - # See Slack thread https://mbari.slack.com/archives/C04ETLY6T7V/p1682439517159249?thread_ts=1682128534.742919&cid=C04ETLY6T7V - "lag_secs": 0.5, - "sensor_offset": SensorOffset(4.04, 0.0), - # From https://bitbucket.org/messiem/matlab_libraries/src/master/ - # data_access/donnees_insitu/MBARI/AUV/charge_Dorado.m - # % UBAT flow conversion - # if time>=datenum(2010,6,29), flow_conversion=4.49E-04; - # else, flow_conversion=4.5E-04; % calibration on 2/2/2009 but unknown before # noqa: E501 - # end - # flow_conversion=flow_conversion*1E3; % using flow in mL/s - # flow1Hz=rpm*flow_conversion; - "flow_conversion": 4.5e-4 * 1e3, # conversion to mL/s - }, - ), - ( - "lopc", - { - "data_filename": "lopc.nc", - "cal_filename": None, - "lag_secs": None, - "sensor_offset": None, - }, - ), - ( - "ecopuck", - { - "data_filename": "FLBBCD2K.nc", - "cal_filename": "FLBBCD2K-3695.dev", - "lag_secs": None, - "sensor_offset": None, - }, - ), - ( - "tailcone", - { - "data_filename": "tailCone.nc", - "cal_filename": None, - "lag_secs": None, - "sensor_offset": None, - }, - ), - ], - ) - - # Changes over time - if self.args.auv_name.lower().startswith("dorado"): - self.sinfo["depth"]["sensor_offset"] = None - if start_datetime >= datetime(2007, 4, 30, tzinfo=UTC): - # First missions with 10 Gulpers: 2007.120.00 & 2007.120.01 - for instr in ("ctd1", "ctd2", "hs2", "lopc", "ecopuck", "isus"): - # TODO: Verify the length of the 10-Gulper midsection - self.sinfo[instr]["sensor_offset"] = SensorOffset(4.5, 0.0) - if start_datetime >= datetime(2014, 9, 21, tzinfo=UTC): - # First mission with 20 Gulpers: 2014.265.03 - for instr in ("ctd1", "ctd2", "hs2", "lopc", "ecopuck", "isus"): - self.sinfo[instr]["sensor_offset"] = SensorOffset(4.5, 0.0) - if start_datetime >= datetime(2010, 6, 29, tzinfo=UTC): - self.sinfo["biolume"]["flow_conversion"] = 4.49e-4 * 1e3 - - def _range_qc_combined_nc( + def _range_qc_combined_nc( # noqa: C901, PLR0912 self, instrument: str, variables: list[str], @@ -934,434 +286,6 @@ def _range_qc_combined_nc( self.combined_nc = self.combined_nc.drop_vars(qced_vars) self.logger.info("Done range checking %s", instrument) - def _read_data(self, logs_dir, netcdfs_dir): # noqa: C901, PLR0912 - """Read in all the instrument data into member variables named by "sensor" - Access xarray.Dataset like: self.ctd.data, self.navigation.data, ... - Access calibration coefficients like: self.ctd.cals.t_f0, or as a - dictionary for hs2 data. Collect summary metadata fields that should - describe the source of the data if copied from M3. - """ - self.summary_fields = set() - for sensor, info in self.sinfo.items(): - sensor_info = SensorInfo() - orig_netcdf_filename = Path(netcdfs_dir, info["data_filename"]) - self.logger.debug( - "Reading data from %s into self.%s.orig_data", - orig_netcdf_filename, - sensor, - ) - try: - sensor_info.orig_data = xr.open_dataset( - orig_netcdf_filename, decode_timedelta=False - ) - except (FileNotFoundError, ValueError) as e: - self.logger.debug( - "%-10s: Cannot open file %s: %s", - sensor, - orig_netcdf_filename, - e, - ) - except OverflowError: - self.logger.exception( - "%-10s: Cannot open file %s", - sensor, - orig_netcdf_filename, - ) - self.logger.info( - "Perhaps _remove_bad_values() needs to be called for it in logs2netcdfs.py", - ) - if info["cal_filename"]: - cal_filename = Path(logs_dir, info["cal_filename"]) - self.logger.debug( - "Reading calibrations from %s into self.%s.cals", - orig_netcdf_filename, - sensor, - ) - if str(cal_filename).endswith(".cfg"): - try: - sensor_info.cals = self._read_cfg(cal_filename) - except FileNotFoundError as e: - self.logger.debug("%s", e) - elif str(cal_filename).endswith(".dev"): - try: - sensor_info.cals = self._read_eco_dev(cal_filename) - except FileNotFoundError as e: - self.logger.debug("%s", e) - - setattr(self, sensor, sensor_info) - if hasattr(sensor_info, "orig_data"): - try: - self.summary_fields.add( - getattr(self, sensor).orig_data.attrs["summary"], - ) - except KeyError: - self.logger.warning("%s: No summary field", orig_netcdf_filename) - - # TODO: Warn if no data found and if logs2netcdfs.py should be run - - def _read_cfg(self, cfg_filename): - """Emulate what get_auv_cal.m and processCTD.m do in the - Matlab doradosdp toolbox - """ - self.logger.debug("Opening %s", cfg_filename) - coeffs = Coeffs() - # Default for non-i2map data - coeffs.t_coefs = "A" - coeffs.c_coefs = "A" - with Path(cfg_filename).open() as fh: - for line in fh: - ##self.logger.debug(line) - if line.startswith("//"): - continue - # From get_auv_cal.m - Handle CTD calibration parameters - if line[:2] in ( - "t_", - "c_", - "ep", - "SO", - "BO", - "Vo", - "TC", - "PC", - "Sc", - "Da", - ): - coeff, value = (s.strip() for s in line.split("=")) - try: - self.logger.debug("Saving %s", line) - # Like in Seabird25p.cc use ?_coefs to determine which - # calibration scheme to use for i2map data - if coeff in {"t_coefs", "c_coefs"}: - setattr(coeffs, coeff, str(value.split(";")[0])) - else: - setattr(coeffs, coeff, float(value.split(";")[0])) - except ValueError as e: - self.logger.debug("%s", e) - return coeffs - - def _cal_date_xml_files( - self, - sensor_dir: str, - cal_date_dirs: list, - serial_number: int, - ) -> dict: - cal_date_xml_files = {} - for cal_date_dir in cal_date_dirs: - find_cmd = f'find "{Path(sensor_dir, cal_date_dir)}" -iname "*.xml"' - self.logger.debug("Executing %s", find_cmd) - import subprocess - - safe_sensor_dir = Path(sensor_dir).resolve() - safe_cal_date_dir = Path(sensor_dir, cal_date_dir).resolve() - - find_cmd = f'find "{safe_sensor_dir}" "{safe_cal_date_dir}" -iname "*.xml"' - if not safe_sensor_dir.is_dir() or not safe_cal_date_dir.is_dir(): - error_message = "Invalid directory paths provided." - raise ValueError(error_message) - if not safe_sensor_dir.is_dir() or not safe_cal_date_dir.is_dir(): - error_message = "Invalid directory paths provided." - raise ValueError(error_message) - result = subprocess.run( # noqa: S603 - shlex.split(find_cmd), # noqa: S603 - capture_output=True, - text=True, - check=True, - ) - xml_files = [x for x in result.stdout.split("\n") if x] - if len(xml_files) == 0: - self.logger.debug( - "Cannot find %s.xml in %s/%s", - serial_number, - sensor_dir, - cal_date_dir, - ) - continue - if len(xml_files) > 1: - self.logger.warning( - "Found %d xml files in %s/%s", - len(xml_files), - sensor_dir, - cal_date_dir, - ) - self.logger.info("{xml_files}") - cal_xml_filename = xml_files[0] - - # The .xml file looks like: - # - # - # 2510 - # 06-May-22 - # 1 - # - # - # 0.0000 - # 0.0000e+000 - # .... - try: - root = ET.parse(cal_xml_filename).getroot() - except ET.ParseError as e: - self.logger.warning( - "Cannot parse %s: %s", - cal_xml_filename, - e, - ) - continue - try: - cal_date = datetime.strptime( - root.find("CalibrationDate").text, - "%d-%b-%y", - ).replace(tzinfo=UTC) - except ValueError as e: - self.logger.warning( - "Cannot parse CalibrationDate, %s", - root.find("CalibrationDate").text, - ) - # "/Volumes/DMO/MDUC_CORE_CTD_200103/Calibration Files/SBE-43/143/2011_June/Oxygen_SBE43_0143.XML" # noqa: E501 - # has: 08-Jun-11p - if root.find("CalibrationDate").text.endswith("p"): - self.logger.info("Trying to parse CalibrationDate without 'p'") - cal_date = datetime.strptime( - root.find("CalibrationDate").text[:-1], - "%d-%b-%y", - ).replace(tzinfo=UTC) - else: - error_message = ( - f"Cannot parse CalibrationDate {root.find('CalibrationDate').text}" - ) - raise ValueError(error_message) from e - cal_date_xml_files[cal_date] = cal_xml_filename - - return OrderedDict(sorted(cal_date_xml_files.items())) - - def _read_oxy_coeffs( # noqa: C901, PLR0912, PLR0915 - self, - cfg_filename: Path, - portstbd: str = "", - ) -> tuple[Coeffs, str]: - """Based on the serial number found as a comment in the .cfg file find - the approriate calibration coefficients for the oxygen sensor within the - '/DMO/MDUC_CORE_CTD_200103/Calibration Files' shared drive folder. - portstbd is either "", "port" or "stbd". - """ - # For i2map .cfg file lines look like: - # //OxygenSerialNumber = 2510; - # //note - this is the sensor in line with the C & T sensors. Goes to voltage channel 3 - # - # //OxygenSerialNumber = 3968; - # //note - this sensor is installed on the stbd side of the vehicle in line with the - # // transmissometer. Goes to voltage channel 5 - # //note - seabird has adopted a new DO calibration with a polynomial for temp correction - # //A = -3.0812e-003 - # //B = 7.8442e-005 - # //C = -9.0601e-007 - # //E = 0.036 - # SOc = 0.4466; - # BOc = 0.0000; - # Voff = -0.5070; - # TCor = -0.0000; - # PCor = 1.3500e-04; //not given in new calibration sheet - - # Read from .cfg file to get the serial numbers of the oxygen sensors - self.logger.debug("Opening %s", cfg_filename) - coeffs = Coeffs() - - portstbd_order = { - "port": 0, - "stbd": 1, - } # Typical order of oxygen sensors in seabird25p.cfg file - with cfg_filename.open() as fh: - sensor_count = 0 - serial_numbers = [] - for line in fh: - self.logger.debug(line) - if line.startswith("//OxygenSerialNumber = "): - serial_numbers.append(int(line.split()[-1].strip(";"))) - sensor_count += 1 - if len(serial_numbers) == 0: - error_message = f"No oxygen sensor serial number found in {cfg_filename}" - raise ValueError(error_message) - if len(serial_numbers) > 2: # noqa: PLR2004 - error_message = f"More than 2 oxygen sensor serial numbers found in {cfg_filename}" - raise ValueError(error_message) - if portstbd: - serial_number = serial_numbers[portstbd_order[portstbd]] - self.logger.info( - "Looking for calibration file for O2 sensor serial number %s on %s side", - serial_number, - portstbd, - ) - elif len(serial_numbers) == 1: - self.logger.info( - "Looking for calibration file for O2 sensor serial number %s", - serial_numbers[0], - ) - serial_number = serial_numbers[0] - else: - error_message = ( - f"Multiple oxygen sensor serial numbers found in {cfg_filename} " - "with no port or stbd specified" - ) - raise ValueError(error_message) - - # Find the calibration file for the oxygen sensor - self.logger.debug( - "Finding calibration file for oxygen serial number = %s on mission %s", - serial_number, - self.args.mission, - ) - - safe_calibration_dir = Path(self.calibration_dir).resolve() - if not safe_calibration_dir.is_dir(): - error_message = f"Calibration directory '{self.calibration_dir}' does not exist" - raise LookupError(error_message) - find_cmd = f'find "{safe_calibration_dir}" -name "{serial_number}"' - self.logger.info("Executing: %s ", find_cmd) - safe_find_cmd = shlex.split(find_cmd) - sensor_dir = subprocess.run( # noqa: S603 - safe_find_cmd, # noqa: S603 - capture_output=True, - text=True, - check=True, - ).stdout.strip() - self.logger.debug("%s", sensor_dir) - - safe_sensor_dir = Path(sensor_dir).resolve() - if not safe_sensor_dir.is_dir(): - error_message = f"Sensor directory '{sensor_dir}' does not exist" - raise LookupError(error_message) - # Find only the direct child directories: https://stackoverflow.com/a/20103980 - # Unable to use subprocess.run() with find an "*" in the command, apparently - dir_find_cmd = f'find "{safe_sensor_dir}"/* -maxdepth 0 -type d' - self.logger.debug("Executing: dir_find_cmd = %s", dir_find_cmd) - cal_date_dirs = [x.split("/")[-1] for x in os.popen(dir_find_cmd).read().split("\n") if x] # noqa: S605 - self.logger.info("Found calibration date dirs: %s", " ".join(cal_date_dirs)) - cal_dates = self._cal_date_xml_files(sensor_dir, cal_date_dirs, serial_number) - mission_start = self.seabird25p.orig_data.cf["time"].to_numpy()[0] - cal_date_to_use = next(iter(cal_dates)) # Default to first calibration date - for cal_date in cal_dates: - # Find the most recent calibration date just before the mission start - self.logger.debug( - "Comparing cal_date=%s with mission_start=%s", cal_date, mission_start - ) - self.logger.info( - "File %s has CalibrationDate %s", - cal_dates[cal_date], - cal_date, - ) - if np.datetime64(cal_date.replace(tzinfo=None)) > mission_start: - self.logger.info( - "Breaking from loop as %s is after %s with mission_start=%s", - cal_dates[cal_date], - self.args.mission, - mission_start, - ) - break - cal_date_to_use = cal_date - - if np.datetime64(cal_date_to_use.replace(tzinfo=None)) < mission_start: - self.logger.info( - "File %s is just before %s with mission_start=%s", - cal_dates[cal_date_to_use], - self.args.mission, - mission_start, - ) - else: - self.logger.info( - "File %s is the first calibration file, but is after %s with mission_start=%s", - cal_dates[cal_date_to_use], - self.args.mission, - mission_start, - ) - - # Read the calibration coefficients from the .cal file which looks like: - # INSTRUMENT_TYPE=SBE43 - # SERIALNO=2510 - # OCALDATE=09-Sep-14 - # SOC= 4.533809e-001 - # VOFFSET=-5.191352e-001 - # A=-5.251956e-003 - # B= 2.762519e-004 - # C=-4.164687e-006 - # E= 3.600000e-002 - # Tau20= 1.030000e+000 - - # parse the .xml file to get the "equation 1" calibration coefficients: - # - # - # 5.0544e-001 - # -0.5124 - # -4.8460e-003 - # 2.2670e-004 - # -3.2013e-006 - # 2.5826e+000 - # 1.92634e-004 - # -4.64803e-002 - # 3.6000e-002 - # 1.5600 - #

-3.3000e-002

- #

5.0000e+003

- #

1.4500e+003

- #
- root = ET.parse(cal_dates[cal_date_to_use]).getroot() - cal_xml_serial_number = int(root.find("SerialNumber").text) - if cal_xml_serial_number != serial_number: - self.logger.warning( - "Serial number in %s = %s does not match %s", - cal_dates[cal_date_to_use], - cal_xml_serial_number, - serial_number, - ) - for elem in root.findall("CalibrationCoefficients[@equation]"): - if elem.attrib["equation"] == "1": - eq1 = elem - for child in eq1: - try: - setattr(coeffs, child.tag, float(child.text)) - except ValueError: - setattr(coeffs, child.tag, child.text) - - return coeffs, cal_dates[cal_date_to_use] - - def _read_eco_dev(self, dev_filename): - """Read calibration information from the file associated with the - ecopuck log data. The number match what are in the cal sheets in - https://bitbucket.org/messiem/auv-biolum/src/master/DATA/sensors%20%26%20calibration/FLBBCD2K_Dorado/ - - As of 13 January 2023 the contents of all the FLBBCD2K-3695.dev files are the same: - ECO FLBBCD2K-3695 - Created on: 10/29/2014 - - COLUMNS=9 - N/U=1 - N/U=2 - N/U=3 - CHL=4 0.0073 45 - N/U=5 - Lambda=6 1.633E-06 46 700 700 - N/U=7 - CDOM=8 0.0909 45 - N/U=9 - """ - # Read the calibration coefficients from the .dev file, in case they change - coeffs = Coeffs() - with dev_filename.open() as fh: - for line in fh: - if line.startswith("CHL"): - # CHL (μg/l) = Scale Factor * (Output - Dark counts) - coeffs.chl_scale_factor = float(line.split()[1]) - coeffs.chl_dark_counts = float(line.split()[2]) - elif line.startswith("Lambda"): - # From Scattering Meter Calibration Sheet - wavelength 700 nm - # "Lambda" == "bbp700" ? - # β(θc) m-1 sr-1 = Scale Factor x (Output - Dark Counts) - coeffs.bbp700_scale_factor = float(line.split()[1]) - coeffs.bbp700_dark_counts = float(line.split()[2]) - elif line.startswith("CDOM"): - # CDOM (ppb) = Scale Factor x (Output - Dark Counts) - coeffs.cdom_scale_factor = float(line.split()[1]) - coeffs.cdom_dark_counts = float(line.split()[2]) - return coeffs - def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 # AUV navigation data, which comes from a process on the vehicle that # integrates data from several instruments. We use it to grab the DVL @@ -1837,1054 +761,75 @@ def _gps_process(self, sensor): "comment": "Dead reckoned longitude nudged to GPS positions", } - def _depth_process(self, sensor, latitude=36, cutoff_freq=1): # noqa: PLR0915 - """Depth data (from the Parosci) is 10 Hz - Use a butterworth window - to filter recorded pressure to values that are appropriately sampled - at 1 Hz (when matched with other sensor data). cutoff_freq is in - units of Hz. + def _apply_plumbing_lag( + self, + sensor: str, + time_index: pd.DatetimeIndex, + time_name: str, + ) -> tuple[xr.DataArray, str]: """ - try: - orig_nc = getattr(self, sensor).orig_data - except (FileNotFoundError, AttributeError) as e: - self.logger.debug("Original data not found for %s: %s", sensor, e) - return - - # Remove non-monotonic times - self.logger.debug("Checking for non-monotonic increasing times") - monotonic = monotonic_increasing_time_indices(orig_nc.get_index("time")) - if (~monotonic).any(): - self.logger.debug( - "Removing non-monotonic increasing times at indices: %s", - np.argwhere(~monotonic).flatten(), - ) - orig_nc = orig_nc.sel(time=monotonic) - - depths = orig_nc["depth"] - # Remove egregious outliers before filtering seen form 2008 through 2012 - # ad hoc corrections for depth after testing stoqs_all_dorado load in July 2023 - mission_depth_ranges = { - "2006.054.00": Range(-1, 150), # Soquel Canyon - "2007.120.00": Range(-0.5, 32), # Shallow N. Monterey Bay - "2007.120.01": Range(-0.5, 32), # Shallow N. Monterey Bay - "2007.123.05": Range(-0.5, 32), # Shallow N. Monterey Bay - "2008.281.03": Range(-1, 30), # Shallow (< 30 m depth ) Soquel Bight - "2009.084.02": Range(-1, 60), # Diamond - lots of bad depths - "2009.085.02": Range(-1, 60), # Monterey Bay - lots of bad depths - "2009.112.07": Range(-1, 30), # Shallow Monterey Bay - "2009.113.00": Range(-1, 30), # Shallow Monterey Bay - "2009.154.00": Range(-1, 50), # Shallow Monterey Bay - "2009.155.03": Range(-1, 50), # Shallow Monterey Bay - "2009.272.00": Range(-1, 40), # Shallow Monterey Bay - "2010.118.00": Range(-1, 260), # Monterey Canyon transect - "2010.181.01": Range(-0.5, 22), # Shallow N. Monterey Bay - "2010.181.02": Range(-0.5, 22), # Shallow N. Monterey Bay - # ESP drifter missions out at station 67-70 with Flyer doing casts and ESP - # drifting south toward Davidson Seamount - no gulpers (Frederic sent me note about survey grouping) # noqa: E501 - # Faulty parosci lead to several mission depth aborts at beginning of this set of volume surveys # noqa: E501 - "2010.258.00": Range(-1, 110), # Offshore CANON 2010 - "2010.258.01": Range(-1, 110), # Offshore CANON 2010 - "2010.258.02": Range(-1, 110), # Offshore CANON 2010 - "2010.258.03": Range(-1, 110), # Offshore CANON 2010 - "2010.258.04": Range(-1, 110), # Offshore CANON 2010 - "2010.259.01": Range(-1, 110), # Offshore CANON 2010 - "2010.259.02": Range(-1, 110), # Offshore CANON 2010 - "2011.061.00": Range(-1, 50), # Shallow Monterey Bay - "2011.171.01": Range(-1, 55), # Shallow Monterey Bay - "2011.250.01": Range(-1, 60), # Shallow Monterey Bay - "2011.263.00": Range(-1, 30), # Shallow Monterey Bay - "2011.285.01": Range(-1, 25), # Shallow Monterey Bay - "2012.258.00": Range(-1, 160), # Shallow Monterey Bay - "2012.270.04": Range(-1, 30), # Shallow Monterey Bay - } - if self.args.mission in mission_depth_ranges: - valid_depth_range = mission_depth_ranges[self.args.mission] - self.logger.info( - "Removing depths outside of valid_depth_range=%s for self.args.mission=%s", - valid_depth_range, - self.args.mission, - ) - out_of_range = np.where( - (depths < valid_depth_range.min) | (depths > valid_depth_range.max), - )[0] - self.logger.debug( - "depths: %d out of range values = %s", - len(depths[out_of_range].to_numpy()), - depths[out_of_range].to_numpy(), - ) - self.logger.info("Setting %d depths values to NaN", len(out_of_range)) - depths[out_of_range] = np.nan - depths = depths.dropna("time", how="all") - - # From initial CVS commit in 2004 the processDepth.m file computed - # pres from depth this way. I don't know what is done on the vehicle - # side where a latitude of 36 is not appropriate: GoM, SoCal, etc. - self.logger.debug("Converting depth to pressure using latitude = %s", latitude) - pres = eos80.pres(depths, latitude) - - # See https://docs.scipy.org/doc/scipy-1.0.0/reference/generated/scipy.signal.filtfilt.html#scipy.signal.filtfilt - # and https://docs.scipy.org/doc/scipy-1.0.0/reference/generated/scipy.signal.butter.html#scipy.signal.butter - # Sample rate should be 10 - calcuate it to be sure - sample_rate = 1.0 / np.round( - np.mean(np.diff(depths["time"])) / np.timedelta64(1, "s"), - decimals=2, - ) - if sample_rate != 10: # noqa: PLR2004 - self.logger.warning( - "Expected sample_rate to be 10 Hz, instead it's %.2f Hz", - sample_rate, - ) - - # The Wn parameter for butter() is fraction of the Nyquist frequency - Wn = cutoff_freq / (sample_rate / 2.0) - b, a = signal.butter(8, Wn) - try: - depth_filtpres_butter = signal.filtfilt(b, a, pres) - except ValueError as e: - error_message = "Likely short or empty file" - raise EOFError(error_message) from e - depth_filtdepth_butter = signal.filtfilt(b, a, depths) - - # Use 10 points in boxcar as in processDepth.m - a = 10 - b = signal.windows.boxcar(a) - depth_filtpres_boxcar = signal.filtfilt(b, a, pres) - pres_plot = True # Set to False for debugging other plots - if self.args.plot and pres_plot: - # Use Pandas to plot multiple columns of data - # to validate that the filtering works as expected - pbeg = 0 - pend = len(depths.get_index("time")) - if self.args.plot.startswith("first"): - pend = int(self.args.plot.split("first")[1]) - df_plot = pd.DataFrame(index=depths.get_index("time")[pbeg:pend]) - df_plot["pres"] = pres[pbeg:pend] - df_plot["depth_filtpres_butter"] = depth_filtpres_butter[pbeg:pend] - df_plot["depth_filtpres_boxcar"] = depth_filtpres_boxcar[pbeg:pend] - title = ( - f"First {pend} points from" - f" {self.args.mission}/{self.sinfo[sensor]['data_filename']}" - ) - ax = df_plot.plot(title=title, figsize=(18, 6)) - ax.grid("on") - self.logger.debug("Pausing with plot entitled: %s. Close window to continue.", title) - plt.show() - - depth_filtdepth = xr.DataArray( - depth_filtdepth_butter, - coords=[depths.get_index("time")], - dims={"depth_time"}, - name="depth_filtdepth", + Apply plumbing lag to a time index in the combined netCDF file. + """ + # Convert lag_secs to milliseconds as np.timedelta64 neeeds an integer + lagged_time = time_index - np.timedelta64( + int(self.sinfo[sensor]["lag_secs"] * 1000), + "ms", ) - depth_filtdepth.attrs = { - "long_name": "Filtered Depth", - "standard_name": "depth", - "units": "m", - "comment": ( - f"Original {sample_rate:.3f} Hz depth data filtered using" - f" Butterworth window with cutoff frequency of {cutoff_freq} Hz" - ), - } - - depth_filtpres = xr.DataArray( - depth_filtpres_butter, - coords=[depths.get_index("time")], - dims={"depth_time"}, - name="depth_filtpres", + # Need to update the sensor's time coordinate in the combined netCDF file + # so that DataArrays created with lagged_time fit onto the coordinate + self.combined_nc.coords[f"{sensor}_{time_name}"] = xr.DataArray( + lagged_time, + coords=[lagged_time], + dims={f"{sensor}_{time_name}"}, + name=f"{sensor}_{time_name}", ) - depth_filtpres.attrs = { - "long_name": "Filtered Pressure", - "standard_name": "sea_water_pressure", - "units": "dbar", - "comment": ( - f"Original {sample_rate:.3f} Hz pressure data filtered using" - f" Butterworth window with cutoff frequency of {cutoff_freq} Hz" - ), - } - - self.combined_nc["depth_filtdepth"] = depth_filtdepth - self.combined_nc["depth_filtpres"] = depth_filtpres + lag_info = f"with plumbing lag correction of {self.sinfo[sensor]['lag_secs']} seconds" + return lagged_time, lag_info - def _hs2_process(self, sensor, logs_dir): # noqa: C901, PLR0912, PLR0915 + def _biolume_process(self, sensor): try: orig_nc = getattr(self, sensor).orig_data - except (FileNotFoundError, AttributeError) as e: - self.logger.debug("Original data not found for %s: %s", sensor, e) + except FileNotFoundError as e: + self.logger.error("%s", e) # noqa: TRY400 return + except AttributeError: + error_message = ( + f"{sensor} has no orig_data - likely a missing or zero-sized .log file" + f" in {Path(MISSIONLOGS, self.args.mission)}" + ) + raise EOFError(error_message) from None # Remove non-monotonic times - self.logger.debug("Checking for non-monotonic increasing times") + self.logger.debug("Checking for non-monotonic increasing time") monotonic = monotonic_increasing_time_indices(orig_nc.get_index("time")) if (~monotonic).any(): self.logger.debug( - "Removing non-monotonic increasing times at indices: %s", + "Removing non-monotonic increasing time at indices: %s", np.argwhere(~monotonic).flatten(), ) - orig_nc = orig_nc.sel(time=monotonic) - - try: - cal_fn = Path(logs_dir, self.sinfo["hs2"]["cal_filename"]) - cals = hs2_read_cal_file(cal_fn) - except FileNotFoundError as e: - self.logger.warning("Cannot process HS2 data: %s", e) - return - - hs2 = hs2_calc_bb(orig_nc, cals) - - source = self.sinfo[sensor]["data_filename"] - coord_str = f"{sensor}_time {sensor}_depth {sensor}_latitude {sensor}_longitude" - - # Blue backscatter - if hasattr(hs2, "bbp420"): - blue_bs = xr.DataArray( - hs2.bbp420.to_numpy(), - coords=[hs2.bbp420.get_index("time")], - dims={"hs2_time"}, - name="hs2_bbp420", - ) - blue_bs.attrs = { - "long_name": "Particulate backscattering coefficient at 420 nm", - "coordinates": coord_str, - "units": "m-1", - "comment": (f"Computed by hs2_calc_bb() from data in {source}"), - } - if hasattr(hs2, "bbp470"): - blue_bs = xr.DataArray( - hs2.bbp470.to_numpy(), - coords=[hs2.bbp470.get_index("time")], - dims={"hs2_time"}, - name="hs2_bbp470", - ) - blue_bs.attrs = { - "long_name": "Particulate backscattering coefficient at 470 nm", - "coordinates": coord_str, - "units": "m-1", - "comment": (f"Computed by hs2_calc_bb() from data in {source}"), - } - - # Red backscatter - if hasattr(hs2, "bbp676"): - red_bs = xr.DataArray( - hs2.bbp676.to_numpy(), - coords=[hs2.bbp676.get_index("time")], - dims={"hs2_time"}, - name="hs2_bbp676", - ) - red_bs.attrs = { - "long_name": "Particulate backscattering coefficient at 676 nm", - "coordinates": coord_str, - "units": "m-1", - "comment": (f"Computed by hs2_calc_bb() from data in {source}"), - } - if hasattr(hs2, "bbp700"): - red_bs = xr.DataArray( - hs2.bbp700.to_numpy(), - coords=[hs2.bbp700.get_index("time")], - dims={"hs2_time"}, - name="hs2_bbp700", - ) - red_bs.attrs = { - "long_name": "Particulate backscattering coefficient at 700 nm", - "coordinates": coord_str, - "units": "m-1", - "comment": (f"Computed by hs2_calc_bb() from data in {source}"), - } - - # Fluorescence - if hasattr(hs2, "fl676"): - fl676 = xr.DataArray( - hs2.fl676.to_numpy(), - coords=[hs2.fl676.get_index("time")], - dims={"hs2_time"}, - name="hs2_fl676", - ) - fl676.attrs = { - "long_name": "Fluorescence at 676 nm", - "coordinates": coord_str, - "comment": (f"Computed by hs2_calc_bb() from data in {source}"), - } - fl = fl676 - if hasattr(hs2, "fl700"): - fl700 = xr.DataArray( - hs2.fl700.to_numpy(), - coords=[hs2.fl700.get_index("time")], - dims={"hs2_time"}, - name="hs2_fl700", - ) - fl700.attrs = { - "long_name": "Fluorescence at 700 nm", - "coordinates": coord_str, - "comment": (f"Computed by hs2_calc_bb() from data in {source}"), - } - fl = fl700 - - # Zeroth level quality control - same as in legacy Matlab - mblue = np.ma.masked_invalid(blue_bs) - mblue = np.ma.masked_greater(mblue, 0.1) - mred = np.ma.masked_invalid(red_bs) - mred = np.ma.masked_greater(mred, 0.1) - mfl = np.ma.masked_invalid(fl) - mfl = np.ma.masked_greater(mfl, 0.02) - - bad_hs2 = [ - f"{b}, {r}, {f}" - for b, r, f in zip( - blue_bs.to_numpy()[:][mblue.mask], - red_bs.to_numpy()[:][mred.mask], - fl.to_numpy()[:][mfl.mask], - strict=False, - ) - ] + orig_nc = orig_nc.sel({TIME: monotonic}) - if bad_hs2: + self.logger.info("Checking for non-monotonic increasing %s", TIME60HZ) + monotonic = monotonic_increasing_time_indices(orig_nc.get_index(TIME60HZ)) + if (~monotonic).any(): self.logger.info( - "Number of bad %s points: %d of %d", - sensor, - len(blue_bs.to_numpy()[:][mblue.mask]), - len(blue_bs), - ) - self.logger.debug( - "Removing bad %s points (indices, (blue, red, fl)): %s, %s", - sensor, - np.where(mred.mask)[0], - bad_hs2, - ) - blue_bs = blue_bs[:][~mblue.mask] - red_bs = red_bs[:][~mfl.mask] - - red_blue_plot = True # Set to False for debugging other plots - if self.args.plot and red_blue_plot: - # Use Pandas to more easiily plot multiple columns of data - pbeg = 0 - pend = len(blue_bs.get_index("hs2_time")) - if self.args.plot.startswith("first"): - pend = int(self.args.plot.split("first")[1]) - df_plot = pd.DataFrame(index=blue_bs.get_index("hs2_time")[pbeg:pend]) - df_plot["blue_bs"] = blue_bs[pbeg:pend] - df_plot["red_bs"] = red_bs[pbeg:pend] - ## df_plot["fl"] = fl[pbeg:pend] - title = ( - f"First {pend} points from" - f" {self.args.mission}/{self.sinfo[sensor]['data_filename']}" + "Removing non-monotonic increasing %s at indices: %s", + TIME60HZ, + np.argwhere(~monotonic).flatten(), ) - ax = df_plot.plot(title=title, figsize=(18, 6), ylim=(-0.003, 0.004)) - ax.grid("on") - self.logger.debug("Pausing with plot entitled: %s. Close window to continue.", title) - plt.show() - - # Save blue, red, & fl to combined_nc, also - if hasattr(hs2, "bbp420"): - self.combined_nc["hs2_bbp420"] = blue_bs - if hasattr(hs2, "bbp470"): - self.combined_nc["hs2_bbp470"] = blue_bs - if hasattr(hs2, "bbp676"): - self.combined_nc["hs2_bbp676"] = red_bs - if hasattr(hs2, "bbp700"): - self.combined_nc["hs2_bbp700"] = red_bs - if hasattr(hs2, "fl676"): - self.combined_nc["hs2_fl676"] = fl - if hasattr(hs2, "fl700"): - self.combined_nc["hs2_fl700"] = fl - - # For missions before 2009.055.05 hs2 will have attributes like bbp470, bbp676, and fl676 - # Hobilabs modified the instrument in 2009 to now give: bbp420, bbp700, and fl700, - # apparently giving a better measurement of chlorophyll. - # - # Detect the difference in this code and keep the member names descriptive in the survey - # data so the the end user knows the difference. + orig_nc = orig_nc.sel({TIME60HZ: monotonic}) - # Align Geometry, correct for pitch self.combined_nc[f"{sensor}_depth"] = self._geometric_depth_correction( sensor, orig_nc, ) - out_fn = f"{self.args.auv_name}_{self.args.mission}_cal.nc" - self.combined_nc[f"{sensor}_depth"].attrs = { - "long_name": "Depth", - "units": "m", - "comment": ( - f"Variable depth_filtdepth from {out_fn} linearly interpolated" - f" to {sensor}_time and corrected for pitch using" - f" {self.sinfo[sensor]['sensor_offset']}" - ), - } - - # Coordinates latitude & longitude are interpolated to the sensor time - # in the align.py code. Here we add the sensor depths as this is where - # the sensor offset is applied with _geometric_depth_correction(). - def _calibrated_oxygen( # noqa: PLR0913 - self, - logs_dir, - sensor, - cf, - orig_nc, - var_name, - temperature, - salinity, - portstbd="", - ) -> tuple[xr.DataArray, xr.DataArray]: - """Calibrate oxygen data, returning DataArrays.""" - - if sensor == "seabird25p": - cf, cal_file = self._read_oxy_coeffs( - Path(logs_dir, self.sinfo[sensor]["cal_filename"]), - portstbd, - ) - ( - oxy_mll, - oxy_umolkg, - ) = _calibrated_O2_from_volts_SBE43( - self.combined_nc, - cf, - orig_nc, - var_name, - temperature, - salinity, - ) - mll_comment = ( - f"Derived from {var_name} from {sensor}.nc and eq 1 calibration coefficients " - f"{vars(cf)} from {cal_file = }" - ) - umolkg_comment = ( - f"Computed from oxygen_mll_{portstbd} with " - "'np.multiply(o2_mll * 1.4276, (1.0e6 / (dens * 32)))'" - ) - self.logger.info("%s: parsed from %s file: %s", var_name, cal_file, vars(cf)) - else: - ( - oxy_mll, - oxy_umolkg, - ) = _calibrated_O2_from_volts( - self.combined_nc, - cf, - orig_nc, - var_name, - temperature, - salinity, - ) - mll_comment = ( - f"Derived from {var_name} from {sensor}.nc using calibration " - f"coefficients {vars(cf)}" - ) - umolkg_comment = ( - "Computed from oxygen_mll with " - "'np.multiply(o2_mll * 1.4276, (1.0e6 / (dens * 32)))'" - ) - oxygen_mll = xr.DataArray( - oxy_mll, + source = self.sinfo[sensor]["data_filename"] + self.combined_nc["biolume_flow"] = xr.DataArray( + orig_nc["flow"].to_numpy() * self.sinfo["biolume"]["flow_conversion"], coords=[orig_nc.get_index("time")], dims={f"{sensor}_time"}, - name="oxygen_mll" + portstbd, - ) - oxygen_mll.attrs = { - "long_name": "Dissolved Oxygen", - "units": "ml/l", - "comment": mll_comment, - } - - oxygen_umolkg = xr.DataArray( - oxy_umolkg, - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name="oxygen_umolkg" + portstbd, - ) - oxygen_umolkg.attrs = { - "long_name": "Dissolved Oxygen", - "units": "umol/kg", - "comment": umolkg_comment, - } - return oxygen_mll, oxygen_umolkg - - def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 - # Don't be put off by the length of this method. - # It's lengthy because of all the possible netCDF variables and - # attribute metadata that need to be added to the combined_nc. - try: - orig_nc = getattr(self, sensor).orig_data - except FileNotFoundError as e: - self.logger.exception("%s", e) # noqa: TRY401 - return - except AttributeError: - error_message = ( - f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" - ) - raise EOFError(error_message) from None - - # Remove non-monotonic times - self.logger.debug("Checking for non-monotonic increasing times") - monotonic = monotonic_increasing_time_indices(orig_nc.get_index("time")) - if (~monotonic).any(): - self.logger.debug( - "Removing non-monotonic increasing times at indices: %s", - np.argwhere(~monotonic).flatten(), - ) - orig_nc = orig_nc.sel(time=monotonic) - - # Need to do this zeroth-level QC to calibrate temperature - orig_nc["temp_frequency"][orig_nc["temp_frequency"] == 0.0] = np.nan - source = self.sinfo[sensor]["data_filename"] - - # === Temperature and salinity variables === - # Seabird specific calibrations - vars_to_qc = [] - self.logger.debug("Calling _calibrated_temp_from_frequency()") - temperature = xr.DataArray( - _calibrated_temp_from_frequency(cf, orig_nc), - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name="temperature", - ) - temperature.attrs = { - "long_name": "Temperature", - "standard_name": "sea_water_temperature", - "units": "degree_Celsius", - "comment": ( - f"Derived from temp_frequency from {source} via calibration parms: {cf.__dict__}" - ), - } - self.combined_nc[f"{sensor}_temperature"] = temperature - - self.logger.debug("Calling _calibrated_sal_from_cond_frequency()") - cal_conductivity, cal_salinity = _calibrated_sal_from_cond_frequency( - self.args, - self.combined_nc, - self.logger, - cf, - orig_nc, - temperature, - ) - conductivity = xr.DataArray( - cal_conductivity, - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name="conductivity", - ) - conductivity.attrs = { - "long_name": "Conductivity", - "standard_name": "sea_water_conductivity", - "units": "Siemens/meter", - "comment": ( - f"Derived from cond_frequency from {source} via calibration parms: {cf.__dict__}" - ), - } - self.combined_nc[f"{sensor}_conductivity"] = conductivity - vars_to_qc.append(f"{sensor}_salinity") - salinity = xr.DataArray( - cal_salinity, - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name="salinity", - ) - salinity.attrs = { - "long_name": "Salinity", - "standard_name": "sea_water_salinity", - "units": "", - "comment": ( - f"Derived from cond_frequency from {source} via calibration parms: {cf.__dict__}" - ), - } - self.combined_nc[f"{sensor}_salinity"] = salinity - - # Variables computed onboard the vehicle that are recomputed here - self.logger.debug("Collecting temperature_onboard") - temperature_onboard = xr.DataArray( - orig_nc["temperature"], - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name="temperature_onboard", - ) - # Onboard software sets bad values to absolute zero - replace with NaN - temperature_onboard[temperature_onboard <= -273] = np.nan # noqa: PLR2004 - temperature_onboard.attrs = { - "long_name": "Temperature computed onboard the vehicle", - "units": "degree_Celsius", - "comment": ( - "Temperature computed onboard the vehicle from" - " calibration parameters installed on the vehicle" - " at the time of deployment." - ), - } - self.combined_nc[f"{sensor}_temperature_onboard"] = temperature_onboard - - self.logger.debug("Collecting conductivity_onboard") - conductivity_onboard = xr.DataArray( - orig_nc["conductivity"], - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name="conductivity_onboard", - ) - conductivity_onboard.attrs = { - "long_name": "Conductivity computed onboard the vehicle", - "units": "Siemens/meter", - "comment": ( - "Temperature computed onboard the vehicle from" - " calibration parameters installed on the vehicle" - " at the time of deployment." - ), - } - self.combined_nc[f"{sensor}_conductivity_onboard"] = conductivity_onboard - - if "salinity" in orig_nc: - self.logger.debug("Collecting salinity_onboard") - salinity_onboard = xr.DataArray( - orig_nc["salinity"], - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name="salinity_onboard", - ) - salinity_onboard.attrs = { - "long_name": "Salinity computed onboard the vehicle", - "units": "", - "comment": ( - "Salinity computed onboard the vehicle from" - " calibration parameters installed on the vehicle" - " at the time of deployment." - ), - } - self.combined_nc[f"{sensor}_salinity_onboard"] = salinity_onboard - - # === Oxygen variables === - # original values in units of volts - self.logger.debug("Collecting dissolvedO2") - try: - dissolvedO2 = xr.DataArray( - orig_nc["dissolvedO2"], - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name="dissolvedO2", - ) - dissolvedO2.attrs = { - "long_name": "Dissolved Oxygen sensor", - "units": "Volts", - "comment": ("Analog Voltage Channel 6 - to be converted to umol/kg"), - } - self.combined_nc[f"{sensor}_dissolvedO2"] = dissolvedO2 - ( - self.combined_nc[f"{sensor}_oxygen_mll"], - self.combined_nc[f"{sensor}_oxygen_umolkg"], - ) = self._calibrated_oxygen( - logs_dir, - sensor, - cf, - orig_nc, - "dissolvedO2", - temperature, - salinity, - "", - ) - except KeyError: - self.logger.debug("No dissolvedO2 data in %s", self.args.mission) - except ValueError as e: - cfg_file = Path( - MISSIONLOGS, - "".join(self.args.mission.split(".")[:2]), - self.args.mission, - self.sinfo["ctd"]["cal_filename"], - ) - self.logger.exception("Likely missing a calibration coefficient in %s", cfg_file) - self.logger.error(e) # noqa: TRY400 - self.logger.debug("Collecting dissolvedO2_port") - try: - dissolvedO2_port = xr.DataArray( - orig_nc["dissolvedO2_port"], - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name="dissolvedO2_port", - ) - dissolvedO2_port.attrs = { - "long_name": "Dissolved Oxygen port side sensor", - "units": "Volts", - "comment": ("Analog Voltage Channel 3 - to be converted to umol/kg"), - } - self.combined_nc[f"{sensor}_dissolvedO2_port"] = dissolvedO2_port - ( - self.combined_nc[f"{sensor}_oxygen_mll_port"], - self.combined_nc[f"{sensor}_oxygen_umolkg_port"], - ) = self._calibrated_oxygen( - logs_dir, - sensor, - cf, - orig_nc, - "dissolvedO2_port", - temperature, - salinity, - "port", - ) - except KeyError: - self.logger.debug("No dissolvedO2_port data in %s", self.args.mission) - self.logger.debug("Collecting dissolvedO2_port") - try: - dissolvedO2_stbd = xr.DataArray( - orig_nc["dissolvedO2_stbd"], - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name="dissolvedO2_stbd", - ) - dissolvedO2_stbd.attrs = { - "long_name": "Dissolved Oxygen stbd side sensor", - "units": "Volts", - "comment": ("Analog Voltage Channel 5 - to be converted to umol/kg"), - } - self.combined_nc[f"{sensor}_dissolvedO2_stbd"] = dissolvedO2_stbd - ( - self.combined_nc[f"{sensor}_oxygen_mll_stbd"], - self.combined_nc[f"{sensor}_oxygen_umolkg_stbd"], - ) = self._calibrated_oxygen( - logs_dir, - sensor, - cf, - orig_nc, - "dissolvedO2_stbd", - temperature, - salinity, - "stbd", - ) - except KeyError: - self.logger.debug("No dissolvedO2_port data in %s", self.args.mission) - - # === flow variables === - # A lot of 0.0 values in Dorado missions until about 2020.282.01 - self.logger.debug("Collecting flow1") - try: - flow1 = xr.DataArray( - orig_nc["flow1"], - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name="flow1", - ) - flow1.attrs = { - "long_name": "Flow sensor on ctd1", - "units": "Volts", - "comment": f"flow1 from {source}", - } - self.combined_nc[f"{sensor}_flow1"] = flow1 - except KeyError: - self.logger.debug("No flow1 data in %s", self.args.mission) - self.logger.debug("Collecting flow2") - try: - flow2 = xr.DataArray( - orig_nc["flow2"], - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name="flow2", - ) - flow2.attrs = { - "long_name": "Flow sensor on ctd1", - "units": "Volts", - "comment": f"flow2 from {source}", - } - self.combined_nc[f"{sensor}_flow2"] = flow2 - except KeyError: - self.logger.debug("No flow2 data in %s", self.args.mission) - - # === beam_transmittance variable from seabird25p on i2map vehicle === - try: - beam_transmittance, _ = _beam_transmittance_from_volts( - self.combined_nc, - orig_nc, - ) - beam_transmittance = xr.DataArray( - beam_transmittance * 100.0, - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name="beam_transmittance", - ) - beam_transmittance.attrs = { - "long_name": "Beam Transmittance", - "units": "%", - "comment": ( - f"Calibrated Beam Transmittance from {source}'s transmissometer variable" - ), - } - self.combined_nc[f"{sensor}_beam_transmittance"] = beam_transmittance - - except KeyError: - self.logger.debug( - "No transmissometer data in %s/%s.nc", - self.args.mission, - sensor, - ) - - self.combined_nc[f"{sensor}_depth"] = self._geometric_depth_correction( - sensor, - orig_nc, - ) - out_fn = f"{self.args.auv_name}_{self.args.mission}_cal.nc" - self.combined_nc[f"{sensor}_depth"].attrs = { - "long_name": "Depth", - "units": "m", - "comment": ( - f"Variable depth_filtdepth from {out_fn} linearly interpolated" - f" to {sensor}_time and corrected for pitch using" - f" {self.sinfo[sensor]['sensor_offset']}" - ), - } - - # === PAR variable from ctd2 on dorado vehicle === - try: - par = xr.DataArray( - orig_nc["par"], - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name="par", - ) - par.attrs = { - "long_name": "Photosynthetically Available Radiation", - "units": "Volts", - "comment": f"PAR from {source}'s par variable", - } - self.combined_nc[f"{sensor}_par"] = par - - except KeyError: - self.logger.debug("No par data in %s/%s.nc", self.args.mission, sensor) - - self.combined_nc[f"{sensor}_depth"] = self._geometric_depth_correction( - sensor, - orig_nc, - ) - out_fn = f"{self.args.auv_name}_{self.args.mission}_cal.nc" - self.combined_nc[f"{sensor}_depth"].attrs = { - "long_name": "Depth", - "units": "m", - "comment": ( - f"Variable depth_filtdepth from {out_fn} linearly interpolated" - f" to {sensor}_time and corrected for pitch using" - f" {self.sinfo[sensor]['sensor_offset']}" - ), - } - - # === ad hoc Range checking === - self.logger.info( - "Performing range checking of %s in %s/%s.nc", vars_to_qc, self.args.mission, sensor - ) - self._range_qc_combined_nc( - instrument=sensor, - variables=vars_to_qc, - ranges={f"{sensor}_salinity": Range(30, 40)}, - set_to_nan=True, - ) - if self.args.mission == "2010.284.00": - self.logger.info( - "Removing points outside of time range for %s/%s.nc", self.args.mission, sensor - ) - self._range_qc_combined_nc( - instrument=sensor, - variables=[v for v in self.combined_nc.variables if v.startswith(sensor)], - ranges={ - f"{sensor}_time": Range( - pd.Timestamp(2010, 10, 11, 20, 0, 0), - pd.Timestamp(2010, 10, 12, 3, 28, 0), - ), - }, - ) - - def _tailcone_process(self, sensor): - # As requested by Rob Sherlock capture propRpm for comparison with - # mWaterSpeed from navigation.log - try: - orig_nc = getattr(self, sensor).orig_data - except FileNotFoundError as e: - self.logger.error("%s", e) # noqa: TRY400 - return - except AttributeError: - error_message = ( - f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" - ) - raise EOFError(error_message) from None - - # Remove non-monotonic times - self.logger.debug("Checking for non-monotonic increasing times") - try: - monotonic = monotonic_increasing_time_indices(orig_nc.get_index("time")) - except IndexError: - error_message = "No data in tailcone.nc - likely empty tailcone.log file" - raise ValueError(error_message) from None - if (~monotonic).any(): - self.logger.debug( - "Removing non-monotonic increasing times at indices: %s", - np.argwhere(~monotonic).flatten(), - ) - orig_nc = orig_nc.sel(time=monotonic) - - source = self.sinfo[sensor]["data_filename"] - coord_str = f"{sensor}_time {sensor}_depth {sensor}_latitude {sensor}_longitude" - self.combined_nc["tailcone_propRpm"] = xr.DataArray( - orig_nc["propRpm"].to_numpy(), - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name=f"{sensor}_propRpm", - ) - self.combined_nc["tailcone_propRpm"].attrs = { - "long_name": "Vehicle propeller speed", - # Don't be confused by its name - propeller speed is logged in radians/sec. - "units": "rad/s", - "coordinates": coord_str, - "comment": f"propRpm from {source} (convert to RPM by multiplying by 9.549297)", - } - - def _ecopuck_process(self, sensor, cf): - # ecpouck's first mission 2020.245.00 - email dialog on 5 Dec 2022 discussing - # using it for developing an HS2 transfer function and comparison with LRAUV data - try: - orig_nc = getattr(self, sensor).orig_data - except FileNotFoundError as e: - self.logger.error("%s", e) # noqa: TRY400 - return - except AttributeError: - error_message = ( - f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" - ) - raise EOFError(error_message) from None - - # Remove non-monotonic times - self.logger.debug("Checking for non-monotonic increasing times") - monotonic = monotonic_increasing_time_indices(orig_nc.get_index("time")) - if (~monotonic).any(): - self.logger.debug( - "Removing non-monotonic increasing times at indices: %s", - np.argwhere(~monotonic).flatten(), - ) - orig_nc = orig_nc.sel(time=monotonic) - - source = self.sinfo[sensor]["data_filename"] - coord_str = f"{sensor}_time {sensor}_depth {sensor}_latitude {sensor}_longitude" - beta_700 = cf.bbp700_scale_factor * (orig_nc["BB_Sig"].to_numpy() - cf.bbp700_dark_counts) - _, bbp = compute_backscatter(700, AVG_SALINITY, beta_700) # 33.6 - - self.combined_nc["ecopuck_bbp700"] = xr.DataArray( - bbp, - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name=f"{sensor}_bbp700", - ) - self.combined_nc["ecopuck_bbp700"].attrs = { - "long_name": "Particulate backscattering coefficient at 700 nm", - "units": "m-1", - "coordinates": coord_str, - "comment": ( - f"BB_Sig from {source} converted to beta_700 using scale factor " - f"{cf.bbp700_scale_factor} and dark counts {cf.bbp700_dark_counts}, " - "then converted to bbp700 by the compute_backscatter() function." - ), - } - - self.combined_nc["ecopuck_cdom"] = xr.DataArray( - cf.cdom_scale_factor * (orig_nc["CDOM_Sig"].to_numpy() - cf.cdom_dark_counts), - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name=f"{sensor}_cdom", - ) - self.combined_nc["ecopuck_cdom"].attrs = { - "long_name": "Colored Dissolved Organic Matter", - "units": "ppb", - "coordinates": coord_str, - "comment": ( - f"CDOM_Sig from {source} converted to cdom using scale factor " - f"{cf.cdom_scale_factor} and dark counts {cf.cdom_dark_counts}" - ), - } - - self.combined_nc["ecopuck_chl"] = xr.DataArray( - cf.chl_scale_factor * (orig_nc["Chl_Sig"].to_numpy() - cf.chl_dark_counts), - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name=f"{sensor}_chl", - ) - - # From: FLBBCD2K-3695_(CHL)CharSheet.pdf - # The relationship between fluorescence and chlorophyll-a concentrations in-situ is - # highly variable. The scale factor listed on this document was determined using a - # mono-culture of phytoplankton (Thalassiosira weissflogii). The population was - # assumed to be reasonably healthy and the concentration was determined by using the - # absorption method. To accurately determine chlorophyll concentration using a - # fluorometer, you must perform secondary measurements on the populations of - # interest. This is typically done using extraction-based measurement techniques on - # discrete samples. For additional information on determining chlorophyll - # concentration see "Standard Methods for the Examination of Water and Wastewater" - # part 10200 H, published jointly by the American Public Health Association, - # American Water Works Association, and the Water Environment ,)deration. - self.combined_nc["ecopuck_chl"].attrs = { - "long_name": "Chlorophyll", - "units": "ug/l", - "coordinates": coord_str, - "comment": ( - f"Chl_Sig from {source} converted to chl using scale factor " - f"{cf.chl_scale_factor} and dark counts {cf.chl_dark_counts}" - ), - } - - def _apply_plumbing_lag( - self, - sensor: str, - time_index: pd.DatetimeIndex, - time_name: str, - ) -> tuple[xr.DataArray, str]: - """ - Apply plumbing lag to a time index in the combined netCDF file. - """ - # Convert lag_secs to milliseconds as np.timedelta64 neeeds an integer - lagged_time = time_index - np.timedelta64( - int(self.sinfo[sensor]["lag_secs"] * 1000), - "ms", - ) - # Need to update the sensor's time coordinate in the combined netCDF file - # so that DataArrays created with lagged_time fit onto the coordinate - self.combined_nc.coords[f"{sensor}_{time_name}"] = xr.DataArray( - lagged_time, - coords=[lagged_time], - dims={f"{sensor}_{time_name}"}, - name=f"{sensor}_{time_name}", - ) - lag_info = f"with plumbing lag correction of {self.sinfo[sensor]['lag_secs']} seconds" - return lagged_time, lag_info - - def _biolume_process(self, sensor): - try: - orig_nc = getattr(self, sensor).orig_data - except FileNotFoundError as e: - self.logger.error("%s", e) # noqa: TRY400 - return - except AttributeError: - error_message = ( - f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" - ) - raise EOFError(error_message) from None - - # Remove non-monotonic times - self.logger.debug("Checking for non-monotonic increasing time") - monotonic = monotonic_increasing_time_indices(orig_nc.get_index("time")) - if (~monotonic).any(): - self.logger.debug( - "Removing non-monotonic increasing time at indices: %s", - np.argwhere(~monotonic).flatten(), - ) - orig_nc = orig_nc.sel({TIME: monotonic}) - - self.logger.info("Checking for non-monotonic increasing %s", TIME60HZ) - monotonic = monotonic_increasing_time_indices(orig_nc.get_index(TIME60HZ)) - if (~monotonic).any(): - self.logger.info( - "Removing non-monotonic increasing %s at indices: %s", - TIME60HZ, - np.argwhere(~monotonic).flatten(), - ) - orig_nc = orig_nc.sel({TIME60HZ: monotonic}) - - self.combined_nc[f"{sensor}_depth"] = self._geometric_depth_correction( - sensor, - orig_nc, - ) - - source = self.sinfo[sensor]["data_filename"] - self.combined_nc["biolume_flow"] = xr.DataArray( - orig_nc["flow"].to_numpy() * self.sinfo["biolume"]["flow_conversion"], - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name=f"{sensor}_flow", + name=f"{sensor}_flow", ) self.combined_nc["biolume_flow"].attrs = { "long_name": "Bioluminesence pump flow rate", @@ -2952,159 +897,6 @@ def _biolume_process(self, sensor): set_to_nan=True, ) - def _lopc_process(self, sensor): - try: - orig_nc = getattr(self, sensor).orig_data - except FileNotFoundError as e: - self.logger.error("%s", e) # noqa: TRY400 - return - except AttributeError: - error_message = ( - f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" - ) - raise EOFError(error_message) from None - - source = self.sinfo[sensor]["data_filename"] - coord_str = f"{sensor}_time {sensor}_depth {sensor}_latitude {sensor}_longitude" - - # A lopc.nc file without a time variable will return a RangeIndex object - # from orig_nc.get_index('time') - test for presence of actual 'time' coordinate - if "time" not in orig_nc.coords: - error_message = ( - f"{sensor} has no time coordinate - likely an incomplete lopc.nc file" - f" in {Path(MISSIONLOGS, self.args.mission)}" - ) - raise EOFError(error_message) - - self.combined_nc["lopc_countListSum"] = xr.DataArray( - orig_nc["countListSum"].to_numpy(), - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name=f"{sensor}_countListSum", - ) - self.combined_nc["lopc_countListSum"].attrs = { - "long_name": orig_nc["countListSum"].attrs["long_name"], - "units": orig_nc["countListSum"].attrs["units"], - "coordinates": coord_str, - "comment": f"Sum of countListSum values by size class from {source}", - } - - self.combined_nc["lopc_transCount"] = xr.DataArray( - orig_nc["transCount"].to_numpy(), - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name=f"{sensor}_transCount", - ) - self.combined_nc["lopc_transCount"].attrs = { - "long_name": orig_nc["transCount"].attrs["long_name"], - "units": orig_nc["transCount"].attrs["units"], - "coordinates": coord_str, - "comment": f"transCount from {source}", - } - - self.combined_nc["lopc_nonTransCount"] = xr.DataArray( - orig_nc["nonTransCount"].to_numpy(), - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name=f"{sensor}_nonTransCount", - ) - self.combined_nc["lopc_nonTransCount"].attrs = { - "long_name": orig_nc["nonTransCount"].attrs["long_name"], - "units": orig_nc["nonTransCount"].attrs["units"], - "coordinates": coord_str, - "comment": f"nonTransCount from {source}", - } - - self.combined_nc["lopc_LCcount"] = xr.DataArray( - orig_nc["LCcount"].to_numpy(), - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name=f"{sensor}_LCcount", - ) - self.combined_nc["lopc_LCcount"].attrs = { - "long_name": orig_nc["LCcount"].attrs["long_name"], - "units": orig_nc["LCcount"].attrs["units"], - "coordinates": coord_str, - "comment": f"LCcount from {source}", - } - - self.combined_nc["lopc_flowSpeed"] = xr.DataArray( - orig_nc["flowSpeed"].to_numpy(), - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name=f"{sensor}_flowSpeed", - ) - self.combined_nc["lopc_flowSpeed"].attrs = { - "long_name": orig_nc["flowSpeed"].attrs["long_name"], - "units": orig_nc["flowSpeed"].attrs["units"], - "coordinates": coord_str, - "comment": f"flowSpeed from {source}", - } - - def _isus_process(self, sensor): - try: - orig_nc = getattr(self, sensor).orig_data - except FileNotFoundError as e: - self.logger.error("%s", e) # noqa: TRY400 - return - except AttributeError: - error_message = ( - f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" - ) - raise EOFError(error_message) from None - - # Remove non-monotonic times - self.logger.debug("Checking for non-monotonic increasing times") - monotonic = monotonic_increasing_time_indices(orig_nc.get_index("time")) - if (~monotonic).any(): - self.logger.debug( - "Removing non-monotonic increasing times at indices: %s", - np.argwhere(~monotonic).flatten(), - ) - orig_nc = orig_nc.sel(time=monotonic) - - source = self.sinfo[sensor]["data_filename"] - coord_str = f"{sensor}_time {sensor}_depth {sensor}_latitude {sensor}_longitude" - - self.combined_nc["isus_nitrate"] = xr.DataArray( - orig_nc["isusNitrate"].to_numpy(), - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name=f"{sensor}_nitrate", - ) - self.combined_nc["isus_nitrate"].attrs = { - "long_name": "Nitrate", - "units": "micromoles/liter", - "coordinates": coord_str, - "comment": f"isusNitrate from {source}", - } - self.combined_nc["isus_temp"] = xr.DataArray( - orig_nc["isusTemp"].to_numpy(), - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name=f"{sensor}_temp", - ) - self.combined_nc["isus_temp"].attrs = { - "long_name": "Temperature from ISUS", - "units": "Celsius", - "coordinates": coord_str, - "comment": f"isusTemp from {source}", - } - self.combined_nc["isus_quality"] = xr.DataArray( - orig_nc["isusQuality"].to_numpy(), - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name=f"{sensor}_quality", - ) - self.combined_nc["isus_quality"].attrs = { - "long_name": "Fit Residuals from ISUS", - "units": "", - "coordinates": coord_str, - "comment": f"isusQuality from {source}", - } - def _geometric_depth_correction(self, sensor, orig_nc): """Performs the align_geom() function from the legacy Matlab. Works for any sensor, but requires navigation being processed first @@ -3206,39 +998,6 @@ def _geometric_depth_correction(self, sensor, orig_nc): return corrected_depth - def _process(self, sensor, logs_dir, netcdfs_dir): # noqa: C901, PLR0912 - coeffs = None - try: - coeffs = getattr(self, sensor).cals - except AttributeError as e: - self.logger.debug("No calibration information for %s: %s", sensor, e) - - if sensor == "navigation": - self._navigation_process(sensor) - elif sensor == "gps": - self._gps_process(sensor) - elif sensor == "depth": - self._depth_process(sensor) - elif sensor == "ecopuck": - self._ecopuck_process(sensor, coeffs) - elif sensor == "hs2": - self._hs2_process(sensor, logs_dir) - elif sensor == "tailcone": - self._tailcone_process(sensor) - elif sensor == "lopc": - self._lopc_process(sensor) - elif sensor == "isus": - self._isus_process(sensor) - elif sensor in ("ctd1", "ctd2", "seabird25p"): - if coeffs is not None: - self._ctd_process(logs_dir, sensor, coeffs) - elif hasattr(getattr(self, sensor), "orig_data"): - self.logger.warning("No calibration information for %s", sensor) - elif sensor == "biolume": - self._biolume_process(sensor) - elif hasattr(getattr(self, sensor), "orig_data"): - self.logger.warning("No method (yet) to process %s", sensor) - def write_netcdf(self, netcdfs_dir, vehicle: str = "", name: str = "") -> None: name = name or self.args.mission vehicle = vehicle or self.args.auv_name @@ -3253,7 +1012,6 @@ def write_netcdf(self, netcdfs_dir, vehicle: str = "", name: str = "") -> None: ", ".join(sorted(self.combined_nc.variables)), ) - def process_logs(self, vehicle: str = "", name: str = "", process_gps: bool = True) -> None: # noqa: FBT001, FBT002 name = name or self.args.mission vehicle = vehicle or self.args.auv_name logs_dir = Path(self.args.base_path, vehicle, MISSIONLOGS, name) @@ -3266,8 +1024,6 @@ def process_logs(self, vehicle: str = "", name: str = "", process_gps: bool = Tr self.combined_nc = xr.Dataset() for sensor in self.sinfo: - if not process_gps and sensor == "gps": - continue # to skip gps processing in conftest.py fixture getattr(self, sensor).cal_align_data = xr.Dataset() self.logger.debug("Processing %s %s %s", vehicle, name, sensor) try: @@ -3351,7 +1107,7 @@ def process_command_line(self): if __name__ == "__main__": - cal_netcdf = Calibrate_NetCDF() + cal_netcdf = Combine_NetCDF() cal_netcdf.process_command_line() cal_netcdf.calibration_dir = "/Volumes/DMO/MDUC_CORE_CTD_200103/Calibration Files" p_start = time.time() From 77da25122ac4b596e4dc7fa5cdea655f732f70a9 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 22 Oct 2025 16:22:35 -0700 Subject: [PATCH 025/121] Add netcdf4 dependency to the documentation. --- src/data/m1_soundspeed.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/data/m1_soundspeed.py b/src/data/m1_soundspeed.py index 6a5d8f39..ec089c47 100755 --- a/src/data/m1_soundspeed.py +++ b/src/data/m1_soundspeed.py @@ -84,6 +84,7 @@ some other package manager: gsw xarray + netcdf4 Installation: ------------- @@ -95,7 +96,7 @@ 3. Activate the virtual environment: source venv/bin/activate 4. Install the required packages: - pip install gsw xarray + pip install gsw xarray netcdf4 5. Save this script as m1_soundspeed.py 6. Run the script: python m1_soundspeed.py From dd0cc6cbdc39cce03dddda07a2b84d476722291d Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 23 Oct 2025 10:34:05 -0700 Subject: [PATCH 026/121] Implement --filter_monotonic_time as a base level QC step. The coordiante variable (*time) for each data variable needs to be monotonically increasing. The data is unusable if this is not the case. Remove any data (the default behaviour) where time is not monotonically increasing --- src/data/nc42netcdfs.py | 324 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 317 insertions(+), 7 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 877a7adf..6411d3ce 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -276,26 +276,324 @@ def _get_available_variables( self.logger.debug(" Variables to extract: %s", vars_to_extract) return vars_to_extract + def _find_time_coordinate(self, src_group: netCDF4.Group) -> str: + """Find the time coordinate variable in a group using introspection. + + Returns: + str: Name of the time coordinate variable, or empty string if not found + """ + # Strategy 1: Look for variables with "time" in the name (most common) + time_vars = [var_name for var_name in src_group.variables if "time" in var_name.lower()] + if time_vars: + # Prefer variables that start with 'time' (like time_NAL9602) + time_vars.sort(key=lambda x: (not x.lower().startswith("time"), x)) + self.logger.debug("Found time coordinate %s via name pattern", time_vars[0]) + return time_vars[0] + + # Strategy 2: Look for variables with time-like units + for var_name, var in src_group.variables.items(): + if hasattr(var, "units"): + units = getattr(var, "units", "").lower() + time_patterns = ["seconds since", "days since", "hours since"] + if any(pattern in units for pattern in time_patterns): + self.logger.debug("Found time coordinate %s via units", var_name) + return var_name + + # Strategy 3: Look for unlimited dimension (backup) + for dim_name, dim in src_group.dimensions.items(): + if dim.isunlimited() and dim_name in src_group.variables: + self.logger.debug("Found time coordinate %s via unlimited dimension", dim_name) + return dim_name + + self.logger.debug("No time coordinate found in group") + return "" + + def _get_time_filters_for_variables( + self, src_group: netCDF4.Group, vars_to_extract: list[str] + ) -> dict[str, dict]: + """Get time filtering information for time coordinates used by vars_to_extract. + + Returns: + dict: Map of time_coord_name -> {"indices": list[int], "filtered": bool} + """ + time_filters = {} + + # Check if time filtering is enabled + if not getattr(self.args, "filter_monotonic_time", True): + return time_filters + + # Find all time coordinates used by variables in extraction list + time_coords_found = set() + for var_name in vars_to_extract: + if var_name in src_group.variables: + var = src_group.variables[var_name] + + # Check each dimension to see if it's a time coordinate + for dim_name in var.dimensions: + if dim_name in src_group.variables: + dim_var = src_group.variables[dim_name] + + # Check if this dimension variable is a time coordinate + if self._is_time_variable(dim_name, dim_var): + time_coords_found.add(dim_name) + + # Now process each unique time coordinate found + for time_coord_name in time_coords_found: + time_var = src_group.variables[time_coord_name] + time_data = time_var[:] + mono_indices = self._get_monotonic_indices(time_data) + + # Check if filtering was actually needed + filtered = len(mono_indices) < len(time_data) + if filtered: + self.logger.info( + "Time coordinate %s: filtered %d non-monotonic points (%d -> %d)", + time_coord_name, + len(time_data) - len(mono_indices), + len(time_data), + len(mono_indices), + ) + + time_filters[time_coord_name] = {"indices": mono_indices, "filtered": filtered} + + return time_filters + + def _is_time_variable(self, var_name: str, var) -> bool: + """Check if a variable is a time coordinate variable.""" + # Check name pattern + if "time" in var_name.lower(): + return True + + # Check units + if hasattr(var, "units"): + units = getattr(var, "units", "").lower() + time_patterns = ["seconds since", "days since", "hours since"] + if any(pattern in units for pattern in time_patterns): + return True + + return False + + def _get_monotonic_indices(self, time_data) -> list[int]: + """Get indices for monotonic time values from time data array.""" + mono_indices = [] + if len(time_data) > 0: + mono_indices.append(0) # Always include first point + + for i in range(1, len(time_data)): + if time_data[i] > time_data[mono_indices[-1]]: + mono_indices.append(i) + + return mono_indices + + def _get_monotonic_time_indices(self, src_group: netCDF4.Group) -> tuple[list[int], bool]: + """Get indices for monotonically increasing time data. + + Returns: + list[int]: List of indices for monotonic time points + bool: True if filtering was applied + """ + # Check if time filtering is enabled + if not getattr(self.args, "filter_monotonic_time", True): + return [], False + + # Find the time coordinate variable using introspection + time_var_name = self._find_time_coordinate(src_group) + if not time_var_name: + # No time variable found, return all data + return [], False + + time_var = src_group.variables[time_var_name] + time_data = time_var[:] + + # Find monotonically increasing indices + mono_indices = [] + if len(time_data) > 0: + mono_indices.append(0) # Always include first point + + for i in range(1, len(time_data)): + if time_data[i] > time_data[mono_indices[-1]]: + mono_indices.append(i) + else: + self.logger.debug( + "Non-monotonic time value at index %d: %s <= %s (var: %s)", + i, + time_data[i], + time_data[mono_indices[-1]], + time_var_name, + ) + + total_points = len(time_data) + filtered_points = len(mono_indices) + + if filtered_points < total_points: + self.logger.warning( + "Filtered %d non-monotonic time points (kept %d/%d) for variable %s", + total_points - filtered_points, + filtered_points, + total_points, + time_var_name, + ) + return mono_indices, True + + return mono_indices, False + + def _copy_variable_with_appropriate_time_filter( + self, + src_group: netCDF4.Group, + dst_dataset: netCDF4.Dataset, + var_name: str, + time_filters: dict[str, dict], + ): + """Copy a variable with appropriate time filtering applied.""" + try: + src_var = src_group.variables[var_name] + + # Create variable in destination + dst_var = dst_dataset.createVariable( + var_name, + src_var.dtype, + src_var.dimensions, + zlib=True, + complevel=6, + shuffle=True, + fletcher32=True, + ) + + # Check if this variable itself is a time coordinate that needs filtering + if var_name in time_filters and time_filters[var_name]["filtered"]: + # This is a time coordinate variable that needs filtering + time_indices = time_filters[var_name]["indices"] + dst_var[:] = src_var[:][time_indices] + self.logger.debug("Applied time filtering to time coordinate %s", var_name) + + # Check if this variable depends on any filtered time dimensions + elif src_var.dimensions: + # Find which (if any) of this variable's dimensions are filtered time coordinates + filtered_dims = {} + for dim_name in src_var.dimensions: + if dim_name in time_filters and time_filters[dim_name]["filtered"]: + filtered_dims[dim_name] = time_filters[dim_name]["indices"] + + if filtered_dims: + # Apply filtering for the appropriate dimensions + self._apply_multidimensional_time_filter( + src_var, dst_var, var_name, filtered_dims + ) + else: + # No time filtering needed + dst_var[:] = src_var[:] + else: + # Scalar variable or no dimensions + dst_var[:] = src_var[:] + + # Copy attributes + for attr_name in src_var.ncattrs(): + dst_var.setncattr(attr_name, src_var.getncattr(attr_name)) + + self.logger.debug(" Copied variable: %s", var_name) + + except Exception as e: # noqa: BLE001 + self.logger.warning("Failed to copy variable %s: %s", var_name, e) + + def _apply_multidimensional_time_filter( + self, src_var, dst_var, var_name: str, filtered_dims: dict[str, list[int]] + ): + """Apply time filtering to a multi-dimensional variable.""" + # For now, handle the common case where time is the first dimension + if len(filtered_dims) == 1: + dim_name = list(filtered_dims.keys())[0] + time_indices = filtered_dims[dim_name] + + if src_var.dimensions[0] == dim_name: + # Time is first dimension + if len(src_var.dimensions) == 1: + # 1D variable + dst_var[:] = src_var[:][time_indices] + else: + # Multi-dimensional with time as first dimension + dst_var[:] = src_var[:][time_indices, ...] + self.logger.debug( + "Applied time filtering to variable %s (dim: %s)", var_name, dim_name + ) + else: + # Time dimension is not first - more complex indexing needed + self.logger.warning( + "Variable %s has filtered time dimension %s but not as first dimension - " + "copying all data", + var_name, + dim_name, + ) + dst_var[:] = src_var[:] + else: + # Multiple time dimensions filtered - complex case + self.logger.warning( + "Variable %s has multiple filtered time dimensions - copying all data", var_name + ) + dst_var[:] = src_var[:] + + def _create_dimensions_with_time_filters( + self, + src_group: netCDF4.Group, + dst_dataset: netCDF4.Dataset, + dims_needed: set[str], + time_filters: dict[str, dict], + ): + """Create dimensions in the destination dataset, adjusting time dimensions if filtered.""" + for dim_name in dims_needed: + if dim_name in src_group.dimensions: + src_dim = src_group.dimensions[dim_name] + + # Check if this dimension corresponds to a filtered time variable + if dim_name in time_filters and time_filters[dim_name]["filtered"]: + # Use the number of filtered time points + filtered_size = len(time_filters[dim_name]["indices"]) + size = filtered_size if not src_dim.isunlimited() else None + self.logger.debug( + "Created filtered time dimension %s: %s -> %s", + dim_name, + len(src_dim), + size or filtered_size, + ) + else: + size = len(src_dim) if not src_dim.isunlimited() else None + + dst_dataset.createDimension(dim_name, size) + def _create_netcdf_file( self, src_group: netCDF4.Group, vars_to_extract: list[str], output_file: Path ): - """Create a new NetCDF file with the specified variables.""" + """Create a new NetCDF file with the specified variables and monotonic time.""" + # Get time filtering information for each time variable + time_filters = self._get_time_filters_for_variables(src_group, vars_to_extract) + with netCDF4.Dataset(output_file, "w", format="NETCDF4") as dst_dataset: # Copy global attributes self._copy_global_attributes(src_group, dst_dataset) - # Create dimensions + # Add note about time filtering if applied + if any(tf["filtered"] for tf in time_filters.values()): + dst_dataset.setncattr( + "processing_note", "Non-monotonic time values filtered out during extraction" + ) + + # Create dimensions - may need to adjust time dimension sizes dims_needed = self._get_required_dimensions(src_group, vars_to_extract) - self._create_dimensions(src_group, dst_dataset, dims_needed) + self._create_dimensions_with_time_filters( + src_group, dst_dataset, dims_needed, time_filters + ) - # Copy coordinate variables + # Copy coordinate variables with time filtering coord_vars = self._get_coordinate_variables(src_group, dims_needed, vars_to_extract) for var_name in coord_vars: - self._copy_variable(src_group, dst_dataset, var_name) + self._copy_variable_with_appropriate_time_filter( + src_group, dst_dataset, var_name, time_filters + ) - # Copy requested variables + # Copy requested variables with time filtering for var_name in vars_to_extract: - self._copy_variable(src_group, dst_dataset, var_name) + self._copy_variable_with_appropriate_time_filter( + src_group, dst_dataset, var_name, time_filters + ) def _copy_global_attributes(self, src_group: netCDF4.Group, dst_dataset: netCDF4.Dataset): """Copy global attributes from source to destination.""" @@ -438,6 +736,18 @@ def process_command_line(self): action="store_true", help="Use with --noinput to not re-process existing downloaded log files", ) + parser.add_argument( + "--filter_monotonic_time", + action="store_true", + default=True, + help="Filter out non-monotonic time values (default: True)", + ) + parser.add_argument( + "--no_filter_monotonic_time", + dest="filter_monotonic_time", + action="store_false", + help="Keep all time values, including non-monotonic ones", + ) parser.add_argument( "--start", action="store", From e135177e96e7e5f8d8dae8f41469335ff3003c38 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 28 Oct 2025 08:24:18 -0700 Subject: [PATCH 027/121] Resolution of Issue writen by Claude Sonnet 4 --- ...THUB_ISSUE_6_NC42NETCDFS_IMPLEMENTATION.md | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 src/data/GITHUB_ISSUE_6_NC42NETCDFS_IMPLEMENTATION.md diff --git a/src/data/GITHUB_ISSUE_6_NC42NETCDFS_IMPLEMENTATION.md b/src/data/GITHUB_ISSUE_6_NC42NETCDFS_IMPLEMENTATION.md new file mode 100644 index 00000000..c855ea05 --- /dev/null +++ b/src/data/GITHUB_ISSUE_6_NC42NETCDFS_IMPLEMENTATION.md @@ -0,0 +1,76 @@ +# GitHub Issue #6 Implementation Summary - CORRECTED VERSION + +## Problem +LRAUV NetCDF files sometimes contain non-monotonic time data, which breaks downstream processing tools that expect monotonic time coordinates. **The critical issue was that each NetCDF group contains multiple independent time variables (e.g., `time_NAL9602`, `time_CTD_NeilBrown`) that each need their own monotonic filtering.** + +## Solution Implemented +Complete rewrite of time filtering to handle **multiple independent time variables per group** with the following architecture: + +### 1. Per-Variable Time Detection and Filtering +- **`_get_time_filters_for_variables()`**: Identifies ALL time variables in the extraction list and computes monotonic filtering for each independently +- **`_is_time_variable()`**: Determines if a variable is a time coordinate using name patterns and units +- **`_get_monotonic_indices()`**: Computes monotonic indices for any time data array + +### 2. Multi-Variable Time Processing +- **`_copy_variable_with_appropriate_time_filter()`**: Applies the correct time filtering based on the specific variable: + - If the variable IS a time coordinate: applies its own monotonic filtering + - If the variable DEPENDS on time coordinates: uses the appropriate time dimension's filtering + - If no time dependencies: copies all data unchanged +- **`_create_dimensions_with_time_filters()`**: Adjusts dimension sizes for each filtered time coordinate +- **`_apply_multidimensional_time_filter()`**: Handles complex multi-dimensional filtering + +### 3. Independent Time Coordinate Processing +Each time variable (like `time_NAL9602`, `time_CTD_NeilBrown`) gets: +- Its own monotonic analysis +- Its own filtered indices +- Its own dimension size adjustment +- Independent logging of filtering results + +### 4. Command Line Control (Unchanged) +- **`--filter_monotonic_time`**: Enable time filtering (default behavior) +- **`--no_filter_monotonic_time`**: Disable filtering to preserve all time values + +## Key Methods - CORRECTED ARCHITECTURE + +```python +def _get_time_filters_for_variables(self, src_group, vars_to_extract) -> dict[str, dict]: + """Get time filtering info for EACH time variable in the extraction list. + Returns: {time_var_name: {"indices": list[int], "filtered": bool}}""" + +def _is_time_variable(self, var_name: str, var) -> bool: + """Check if a variable is a time coordinate variable.""" + +def _get_monotonic_indices(self, time_data) -> list[int]: + """Get monotonic indices for any time data array.""" + +def _copy_variable_with_appropriate_time_filter(self, src_group, dst_dataset, var_name, time_filters): + """Copy variable with the APPROPRIATE time filtering for that specific variable.""" + +def _create_dimensions_with_time_filters(self, src_group, dst_dataset, dims_needed, time_filters): + """Create dimensions with MULTIPLE time coordinate filtering.""" + +def _apply_multidimensional_time_filter(self, src_var, dst_var, var_name, filtered_dims): + """Apply time filtering to multi-dimensional variables.""" +``` + +## Testing - CORRECTED VALIDATION +- ✅ Created test with multiple time variables in single group (`time_NAL9602`, `time_CTD_NeilBrown`) +- ✅ Verified independent filtering: `time_NAL9602` (10→8 points), `time_CTD_NeilBrown` (8→6 points) +- ✅ Confirmed each time variable gets its own monotonic indices +- ✅ Validated that data variables use appropriate time coordinate filtering + +## Root Cause Fix +**Previous implementation incorrectly assumed ONE time coordinate per group.** The corrected implementation recognizes that: + +1. **Each group can have multiple time variables** (`time_NAL9602`, `time_CTD_NeilBrown`, etc.) +2. **Each time variable needs independent monotonic filtering** +3. **Data variables must use the filtering from their specific time coordinate** +4. **Different time coordinates can have different amounts of filtering** + +## Backward Compatibility +- Default behavior enables time filtering for safer processing +- Users can disable filtering with `--no_filter_monotonic_time` if needed +- No breaking changes to existing API +- Works with single time coordinate groups (backward compatible) AND multiple time coordinate groups (new functionality) + +This corrected implementation properly addresses GitHub issue #6 by handling the real-world complexity of LRAUV NetCDF files with multiple independent time coordinates per group. \ No newline at end of file From c2ef3086c3620c05b12f9b68e5f69f1842fe370c Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 28 Oct 2025 08:25:11 -0700 Subject: [PATCH 028/121] Add "2.2 - combine.py" --- .vscode/launch.json | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.vscode/launch.json b/.vscode/launch.json index 73a3045b..f9b60a3e 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -103,6 +103,16 @@ "program": "${workspaceFolder}/src/data/hs2_proc.py", "console": "integratedTerminal", }, + + { + "name": "2.2 - combine.py", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/src/data/combine.py", + "console": "integratedTerminal", + "justMyCode": false, + "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] + }, { "name": "3.0 - align.py", "type": "debugpy", From b5dd22f94194877ca98ab60a37ed54d2af32940d Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 28 Oct 2025 08:58:33 -0700 Subject: [PATCH 029/121] WIP on combine.py. Add GROUP literal for globbing of Group .nc files. --- src/data/archive.py | 4 +- src/data/combine.py | 561 ++++------------------------------------ src/data/nc42netcdfs.py | 12 +- 3 files changed, 53 insertions(+), 524 deletions(-) diff --git a/src/data/archive.py b/src/data/archive.py index 2bf8aa37..2352d5bf 100755 --- a/src/data/archive.py +++ b/src/data/archive.py @@ -19,7 +19,7 @@ from create_products import MISSIONIMAGES, MISSIONODVS from logs2netcdfs import BASE_PATH, LOG_FILES, MISSIONNETCDFS, AUV_NetCDF -from nc42netcdfs import BASE_LRAUV_PATH +from nc42netcdfs import BASE_LRAUV_PATH, GROUP from resample import FREQ LOG_NAME = "processing.log" @@ -182,7 +182,7 @@ def copy_to_LRAUV(self, log_file: str, freq: str = FREQ) -> None: self.logger.exception("%s not found", dst_dir) self.logger.info("Is %s mounted?", self.mount_dir) sys.exit(1) - for src_file in sorted(src_dir.glob(f"{Path(log_file).stem}_Group_*.nc")): + for src_file in sorted(src_dir.glob(f"{Path(log_file).stem}_{GROUP}_*.nc")): dst_file = Path(dst_dir, src_file.name) if self.args.clobber: if dst_file.exists(): diff --git a/src/data/combine.py b/src/data/combine.py index bfa0e1bf..f39a1bd0 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -35,11 +35,10 @@ import argparse # noqa: I001 import logging -import shutil import sys import time from argparse import RawTextHelpFormatter -from datetime import UTC, datetime +from datetime import UTC from pathlib import Path from socket import gethostname from typing import NamedTuple @@ -50,9 +49,9 @@ from scipy.interpolate import interp1d import pandas as pd -import pyproj from AUV import monotonic_increasing_time_indices, nudge_positions -from logs2netcdfs import BASE_PATH, MISSIONLOGS, MISSIONNETCDFS, TIME, TIME60HZ, AUV_NetCDF +from logs2netcdfs import AUV_NetCDF, TIME, TIME60HZ +from nc42netcdfs import BASE_LRAUV_PATH, GROUP AVG_SALINITY = 33.6 # Typical value for upper 100m of Monterey Bay @@ -189,14 +188,14 @@ def global_metadata(self): metadata["summary"] = ( "Observational oceanographic data obtained from an Autonomous" " Underwater Vehicle mission with measurements at" - " original sampling intervals. The data have been calibrated" + " original sampling intervals. The data have been processed" " by MBARI's auv-python software." ) if self.summary_fields: # Should be just one item in set, but just in case join them metadata["summary"] += " " + ". ".join(self.summary_fields) metadata["comment"] = ( - f"MBARI Dorado-class AUV data produced from original data" + f"MBARI Long Range AUV data produced from original data" f" with execution of '{self.commandline}'' at {iso_now} on" f" host {gethostname()}. Software available at" f" 'https://github.com/mbari-org/auv-python'" @@ -286,304 +285,6 @@ def _range_qc_combined_nc( # noqa: C901, PLR0912 self.combined_nc = self.combined_nc.drop_vars(qced_vars) self.logger.info("Done range checking %s", instrument) - def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 - # AUV navigation data, which comes from a process on the vehicle that - # integrates data from several instruments. We use it to grab the DVL - # data to help determine vehicle position when it is below the surface. - # - # Nav.depth is used to compute pressure for salinity and oxygen computations - # Nav.latitude and Nav.longitude converted to degrees were added to - # the log file at end of 2004 - # Nav.roll, Nav.pitch, Nav.yaw, Nav.Xpos and Nav.Ypos are extracted for - # 3-D mission visualization - try: - orig_nc = getattr(self, sensor).orig_data - except FileNotFoundError as e: - self.logger.error("%s", e) # noqa: TRY400 - return - except AttributeError: - error_message = ( - f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" - ) - raise EOFError(error_message) from None - - # Remove non-monotonic times - self.logger.debug("Checking for non-monotonic increasing times") - monotonic = monotonic_increasing_time_indices(orig_nc.get_index("time")) - if (~monotonic).any(): - self.logger.debug( - "Removing non-monotonic increasing times at indices: %s", - np.argwhere(~monotonic).flatten(), - ) - orig_nc = orig_nc.sel(time=monotonic) - - source = self.sinfo[sensor]["data_filename"] - coord_str = f"{sensor}_time {sensor}_depth {sensor}_latitude {sensor}_longitude" - vars_to_qc = [] - # Units of these angles are radians in the original files, we want degrees - vars_to_qc.append("navigation_roll") - self.combined_nc["navigation_roll"] = xr.DataArray( - orig_nc["mPhi"].to_numpy() * 180 / np.pi, - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name=f"{sensor}_roll", - ) - self.combined_nc["navigation_roll"].attrs = { - "long_name": "Vehicle roll", - "standard_name": "platform_roll_angle", - "units": "degree", - "coordinates": coord_str, - "comment": f"mPhi from {source}", - } - - vars_to_qc.append("navigation_pitch") - self.combined_nc["navigation_pitch"] = xr.DataArray( - orig_nc["mTheta"].to_numpy() * 180 / np.pi, - coords=[orig_nc.get_index("time")], - dims={"navigation_time"}, - name="pitch", - ) - self.combined_nc["navigation_pitch"].attrs = { - "long_name": "Vehicle pitch", - "standard_name": "platform_pitch_angle", - "units": "degree", - "coordinates": coord_str, - "comment": f"mTheta from {source}", - } - - vars_to_qc.append("navigation_yaw") - self.combined_nc["navigation_yaw"] = xr.DataArray( - orig_nc["mPsi"].to_numpy() * 180 / np.pi, - coords=[orig_nc.get_index("time")], - dims={"navigation_time"}, - name="yaw", - ) - self.combined_nc["navigation_yaw"].attrs = { - "long_name": "Vehicle yaw", - "standard_name": "platform_yaw_angle", - "units": "degree", - "coordinates": coord_str, - "comment": f"mPsi from {source}", - } - - self.combined_nc["navigation_posx"] = xr.DataArray( - orig_nc["mPos_x"].to_numpy() - orig_nc["mPos_x"].to_numpy()[0], - coords=[orig_nc.get_index("time")], - dims={"navigation_time"}, - name="posx", - ) - self.combined_nc["navigation_posx"].attrs = { - "long_name": "Relative lateral easting", - "units": "m", - "coordinates": coord_str, - "comment": f"mPos_x (minus first position) from {source}", - } - - self.combined_nc["navigation_posy"] = xr.DataArray( - orig_nc["mPos_y"].to_numpy() - orig_nc["mPos_y"].to_numpy()[0], - coords=[orig_nc.get_index("time")], - dims={"navigation_time"}, - name="posy", - ) - self.combined_nc["navigation_posy"].attrs = { - "long_name": "Relative lateral northing", - "units": "m", - "coordinates": coord_str, - "comment": f"mPos_y (minus first position) from {source}", - } - - vars_to_qc.append("navigation_depth") - self.combined_nc["navigation_depth"] = xr.DataArray( - orig_nc["mDepth"].to_numpy(), - coords=[orig_nc.get_index("time")], - dims={"navigation_time"}, - name="navigation_depth", - ) - self.combined_nc["navigation_depth"].attrs = { - "long_name": "Depth from Nav", - "standard_name": "depth", - "units": "m", - "comment": f"mDepth from {source}", - } - - self.combined_nc["navigation_mWaterSpeed"] = xr.DataArray( - orig_nc["mWaterSpeed"].to_numpy(), - coords=[orig_nc.get_index("time")], - dims={"navigation_time"}, - name="navigation_mWaterSpeed", - ) - self.combined_nc["navigation_mWaterSpeed"].attrs = { - "long_name": "Current speed based upon DVL data", - "standard_name": "platform_speed_wrt_sea_water", - "units": "m/s", - "comment": f"mWaterSpeed from {source}", - } - - if "latitude" in orig_nc: - navlat_var = "latitude" - elif "latitudeNav" in orig_nc: - # Starting with 2022.243.00 the latitude variable name was changed - navlat_var = "latitudeNav" - else: - navlat_var = None # noqa: F841 - self.logger.debug( - "Likely before 2004.167.04 when latitude was added to navigation.log", - ) - - navlons = None - navlats = None - if "longitude" in orig_nc: - # starting with 2004.167.04 latitude & longitude were added to navigation.log - navlons = orig_nc["longitude"].to_numpy() - navlats = orig_nc["latitude"].to_numpy() - elif "longitudeNav" in orig_nc: - # Starting with 2022.243.00 the longitude variable name was changed - navlons = orig_nc["longitudeNav"].to_numpy() - navlats = orig_nc["latitudeNav"].to_numpy() - else: - # Up through 2004.112.02 we converted from Easting/Northing to lat/lon - # - all missions in Monterey Bay (Zone 10) - self.logger.info( - "Converting from Easting/Northing to lat/lon for mission %s", - self.args.mission, - ) - proj = pyproj.Proj(proj="utm", zone=10, ellps="WGS84", radians=False) - navlons, navlats = proj( - orig_nc["mPos_y"].to_numpy(), - orig_nc["mPos_x"].to_numpy(), - inverse=True, - ) - navlons = navlons * np.pi / 180.0 - navlats = navlats * np.pi / 180.0 - - if navlons.any() and navlats.any(): - vars_to_qc.append("navigation_latitude") - self.combined_nc["navigation_latitude"] = xr.DataArray( - navlats * 180 / np.pi, - coords=[orig_nc.get_index("time")], - dims={"navigation_time"}, - name="latitude", - ) - self.combined_nc["navigation_latitude"].attrs = { - "long_name": "latitude", - "standard_name": "latitude", - "units": "degrees_north", - "comment": f"latitude (converted from radians) from {source}", - } - vars_to_qc.append("navigation_longitude") - self.combined_nc["navigation_longitude"] = xr.DataArray( - navlons * 180 / np.pi, - coords=[orig_nc.get_index("time")], - dims={"navigation_time"}, - name="longitude", - ) - # Setting standard_name attribute here once sets it for all variables - self.combined_nc["navigation_longitude"].coords[f"{sensor}_time"].attrs = { - "standard_name": "time", - } - self.combined_nc["navigation_longitude"].attrs = { - "long_name": "longitude", - "standard_name": "longitude", - "units": "degrees_east", - "comment": f"longitude (converted from radians) from {source}", - } - else: - # Setting standard_name attribute here once sets it for all variables - self.combined_nc["navigation_depth"].coords[f"{sensor}_time"].attrs = { - "standard_name": "time", - } - - # % Remove obvious outliers that later disrupt the section plots. - # % (First seen on mission 2008.281.03) - # % In case we ever use this software for the D Allan B mapping vehicle determine - # % the good depth range from the median of the depths - # % From mission 2011.250.11 we need to first eliminate the near surface values - # % before taking the median. - # pdIndx = find(Nav.depth > 1); - # posDepths = Nav.depth(pdIndx); - pos_depths = np.where(self.combined_nc["navigation_depth"].to_numpy() > 1) - if self.args.mission in {"2013.301.02", "2009.111.00"}: - self.logger.info("Bypassing Nav QC depth check") - maxGoodDepth = 1250 - else: - if pos_depths[0].size == 0: - self.logger.warning( - "No positive depths found in %s/navigation.nc", - self.args.mission, - ) - maxGoodDepth = 1250 - else: - maxGoodDepth = 7 * np.median(pos_depths) - self.logger.debug("median of positive valued depths = %s", np.median(pos_depths)) - if maxGoodDepth < 0: - maxGoodDepth = 100 # Fudge for the 2009.272.00 mission where median was -0.1347! - if self.args.mission == "2010.153.01": - maxGoodDepth = 1250 # Fudge for 2010.153.01 where the depth was bogus, about 1.3 - - self.logger.debug("Finding depths less than '%s' and times > 0'", maxGoodDepth) - - if self.args.mission == "2010.172.01": - self.logger.info( - "Performing special QC for %s/navigation.nc", - self.args.mission, - ) - self._range_qc_combined_nc( - instrument="navigation", - variables=vars_to_qc, - ranges={ - "navigation_depth": Range(0, 1000), - "navigation_roll": Range(-180, 180), - "navigation_pitch": Range(-180, 180), - "navigation_yaw": Range(-360, 360), - "navigation_longitude": Range(-360, 360), - "navigation_latitude": Range(-90, 90), - }, - ) - - missions_to_check = { - "2004.345.00", - "2005.240.00", - "2007.134.09", - "2010.293.00", - "2011.116.00", - "2013.227.00", - "2016.348.00", - "2017.121.00", - "2017.269.01", - "2017.297.00", - "2017.347.00", - "2017.304.00", - "2011.166.00", - } - if self.args.mission in missions_to_check: - self.logger.info( - "Removing points outside of Monterey Bay for %s/navigation.nc", self.args.mission - ) - self._range_qc_combined_nc( - instrument="navigation", - variables=vars_to_qc, - ranges={ - "navigation_longitude": Range(-122.1, -121.7), - "navigation_latitude": Range(36, 37), - }, - ) - if self.args.mission == "2010.284.00": - self.logger.info( - "Removing points outside of time range for %s/navigation.nc", - self.args.mission, - ) - self._range_qc_combined_nc( - instrument="navigation", - variables=[v for v in self.combined_nc.variables if v.startswith(sensor)], - ranges={ - f"{sensor}_time": Range( - pd.Timestamp(2010, 10, 11, 20, 0, 0), - pd.Timestamp(2010, 10, 12, 3, 28, 0), - ), - }, - ) - def _nudge_pos(self, max_sec_diff_at_end=10): """Apply linear nudges to underwater latitudes and longitudes so that they match the surface gps positions. @@ -616,151 +317,6 @@ def _nudge_pos(self, max_sec_diff_at_end=10): return lon_nudged, lat_nudged - def _gps_process(self, sensor): - try: - orig_nc = getattr(self, sensor).orig_data - except FileNotFoundError as e: - self.logger.exception("%s", e) # noqa: TRY401 - return - except AttributeError: - if self.args.mission == "2010.151.04": - # Gulf of Mexico mission - use data from usbl.dat file(s) - usbl_file = Path( - self.args.base_path, - self.args.auv_name, - MISSIONNETCDFS, - self.args.mission, - "usbl.nc", - ) - if not usbl_file.exists(): - # Copy from archive AUVCTD/missionnetcdfs/YYYY/YYYYJJJ the usbl.nc file - from archive import AUVCTD_VOL - - year = self.args.mission.split(".")[0] - YYYYJJJ = "".join(self.args.mission.split(".")[:2]) - missionnetcdfs_dir = Path( - AUVCTD_VOL, - MISSIONNETCDFS, - year, - YYYYJJJ, - self.args.mission, - ) - shutil.copyfile( - Path(missionnetcdfs_dir, "usbl.nc"), - usbl_file, - ) - self.logger.info( - "Just for the GoMx mission 2010.151.04 use data from %s " - "that came from the missionlogs/usbl.dat file", - usbl_file, - ) - orig_nc = xr.open_dataset(usbl_file) - - # Subsample usbl so that it has similar frequency to gps data - # and convert to radians so that it matches the gps data - orig_nc = orig_nc.isel(time=slice(None, None, 10)) - orig_nc["latitude"] = orig_nc["latitude"] * np.pi / 180.0 - orig_nc["longitude"] = orig_nc["longitude"] * np.pi / 180.0 - else: - error_message = ( - f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" - ) - raise EOFError(error_message) from None - - lat = orig_nc["latitude"] * 180.0 / np.pi - if not lat.any(): - error_message = f"No latitude data found in {sensor}.log" - raise ValueError(error_message) - if orig_nc["longitude"][0] > 0: - lon = -1 * orig_nc["longitude"] * 180.0 / np.pi - else: - lon = orig_nc["longitude"] * 180.0 / np.pi - - gps_time_to_save = orig_nc.get_index("time") - lat_to_save = lat - lon_to_save = lon - - source = self.sinfo[sensor]["data_filename"] - vars_to_qc = [] - vars_to_qc.append("gps_latitude") - self.combined_nc["gps_latitude"] = xr.DataArray( - lat_to_save.to_numpy(), - coords=[gps_time_to_save], - dims={"gps_time"}, - name="gps_latitude", - ) - self.combined_nc["gps_latitude"].attrs = { - "long_name": "GPS Latitude", - "standard_name": "latitude", - "units": "degrees_north", - "comment": f"latitude from {source}", - } - - vars_to_qc.append("gps_longitude") - self.combined_nc["gps_longitude"] = xr.DataArray( - lon_to_save.to_numpy(), - coords=[gps_time_to_save], - dims={"gps_time"}, - name="gps_longitude", - ) - # Setting standard_name attribute here once sets it for all variables - self.combined_nc["gps_longitude"].coords[f"{sensor}_time"].attrs = { - "standard_name": "time", - } - self.combined_nc["gps_longitude"].attrs = { - "long_name": "GPS Longitude", - "standard_name": "longitude", - "units": "degrees_east", - "comment": f"longitude from {source}", - } - if self.args.mission in { - "2004.345.00", - "2005.240.00", - "2007.134.09", - "2010.293.00", - "2011.116.00", - "2013.227.00", - "2016.348.00", - "2017.121.00", - "2017.269.01", - "2017.297.00", - "2017.347.00", - "2017.304.00", - "2011.166.00", - }: - self.logger.info( - "Removing points outside of Monterey Bay for %s/gps.nc", self.args.mission - ) - self._range_qc_combined_nc( - instrument="gps", - variables=vars_to_qc, - ranges={ - "gps_latitude": Range(36, 37), - "gps_longitude": Range(-122.1, -121.7), - }, - ) - - # TODO: Put this in a separate module like match_to_gps.py or something - # With navigation dead reckoned positions available in self.combined_nc - # and the gps positions added we can now match the underwater inertial - # (dead reckoned) positions to the surface gps positions. - nudged_longitude, nudged_latitude = self._nudge_pos() - self.combined_nc["nudged_latitude"] = nudged_latitude - self.combined_nc["nudged_latitude"].attrs = { - "long_name": "Nudged Latitude", - "standard_name": "latitude", - "units": "degrees_north", - "comment": "Dead reckoned latitude nudged to GPS positions", - } - self.combined_nc["nudged_longitude"] = nudged_longitude - self.combined_nc["nudged_longitude"].attrs = { - "long_name": "Nudged Longitude", - "standard_name": "longitude", - "units": "degrees_east", - "comment": "Dead reckoned longitude nudged to GPS positions", - } - def _apply_plumbing_lag( self, sensor: str, @@ -793,10 +349,7 @@ def _biolume_process(self, sensor): self.logger.error("%s", e) # noqa: TRY400 return except AttributeError: - error_message = ( - f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" - ) + error_message = f"{sensor} has no orig_data - likely a missing or zero-sized .log file" raise EOFError(error_message) from None # Remove non-monotonic times @@ -998,12 +551,22 @@ def _geometric_depth_correction(self, sensor, orig_nc): return corrected_depth - def write_netcdf(self, netcdfs_dir, vehicle: str = "", name: str = "") -> None: - name = name or self.args.mission - vehicle = vehicle or self.args.auv_name + def combine_groups(self): + log_file = self.args.log_file + src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) + group_files = sorted(src_dir.glob(f"{Path(log_file).stem}_{GROUP}_*.nc")) + self.combined_nc = xr.Dataset() + for group_file in group_files: + self.logger.info("Found group file: %s", group_file) + # Make nudged_longitude, nudged_latitude = self._nudge_pos() call on when appropriate + + def write_netcdf(self) -> None: + log_file = self.args.log_file + netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) + out_fn = Path(netcdfs_dir, f"{self.args.log_file.stem}_cal.nc") + self.combined_nc.attrs = self.global_metadata() - out_fn = Path(netcdfs_dir, f"{vehicle}_{name}_cal.nc") - self.logger.info("Writing calibrated instrument data to %s", out_fn) + self.logger.info("Writing combined group data to %s", out_fn) if Path(out_fn).exists(): Path(out_fn).unlink() self.combined_nc.to_netcdf(out_fn) @@ -1012,43 +575,18 @@ def write_netcdf(self, netcdfs_dir, vehicle: str = "", name: str = "") -> None: ", ".join(sorted(self.combined_nc.variables)), ) - name = name or self.args.mission - vehicle = vehicle or self.args.auv_name - logs_dir = Path(self.args.base_path, vehicle, MISSIONLOGS, name) - netcdfs_dir = Path(self.args.base_path, vehicle, MISSIONNETCDFS, name) - start_datetime = datetime.strptime(".".join(name.split(".")[:2]), "%Y.%j").astimezone( - UTC, - ) - self._define_sensor_info(start_datetime) - self._read_data(logs_dir, netcdfs_dir) - self.combined_nc = xr.Dataset() - - for sensor in self.sinfo: - getattr(self, sensor).cal_align_data = xr.Dataset() - self.logger.debug("Processing %s %s %s", vehicle, name, sensor) - try: - self._process(sensor, logs_dir, netcdfs_dir) - except EOFError as e: - short_name = vehicle.lower() - if vehicle == "Dorado389": - # For supporting pytest & conftest.py fixture - short_name = "dorado" - if sensor in EXPECTED_GROUPS[short_name]: - self.logger.error("Error processing %s: %s", sensor, e) # noqa: TRY400 - else: - self.logger.debug("Error processing %s: %s", sensor, e) - except ValueError: - self.logger.exception("Error processing %s", sensor) - except KeyError as e: - self.logger.error("Error processing %s: missing variable %s", sensor, e) # noqa: TRY400 - return netcdfs_dir def process_command_line(self): examples = "Examples:" + "\n\n" - examples += " Calibrate original data for some missions:\n" - examples += " " + sys.argv[0] + " --mission 2020.064.10\n" - examples += " " + sys.argv[0] + " --auv_name i2map --mission 2020.055.01\n" + examples += " Combine original data from Group files for an LRAUV log file:\n" + examples += ( + " " + + sys.argv[0] + + " -v --log_file brizo/missionlogs/2025/" + + "20250909_20250915/20250914T080941/" + + "202509140809_202509150109.nc4\n" + ) parser = argparse.ArgumentParser( formatter_class=RawTextHelpFormatter, @@ -1056,28 +594,20 @@ def process_command_line(self): epilog=examples, ) - parser.add_argument( - "--base_path", - action="store", - default=BASE_PATH, - help=f"Base directory for missionlogs and missionnetcdfs, default: {BASE_PATH}", - ) - parser.add_argument( - "--auv_name", - action="store", - default="Dorado389", - help="Dorado389 (default), i2MAP, or Multibeam", - ) - parser.add_argument( - "--mission", - action="store", - help="Mission directory, e.g.: 2020.064.10", - ) parser.add_argument( "--noinput", action="store_true", help="Execute without asking for a response, e.g. to not ask to re-download file", ) + parser.add_argument( + "--log_file", + action="store", + help=( + "Path to the log file for the mission, e.g.: " + "brizo/missionlogs/2025/20250903_20250909/" + "20250905T072042/202509050720_202509051653.nc4" + ), + ) parser.add_argument( "--plot", action="store", @@ -1107,12 +637,9 @@ def process_command_line(self): if __name__ == "__main__": - cal_netcdf = Combine_NetCDF() - cal_netcdf.process_command_line() - cal_netcdf.calibration_dir = "/Volumes/DMO/MDUC_CORE_CTD_200103/Calibration Files" - p_start = time.time() - # Set process_gps=False to skip time consuming _nudge_pos() processing - # netcdf_dir = cal_netcdf.process_logs(process_gps=False) - netcdf_dir = cal_netcdf.process_logs() - cal_netcdf.write_netcdf(netcdf_dir) - cal_netcdf.logger.info("Time to process: %.2f seconds", (time.time() - p_start)) + combine = Combine_NetCDF() + combine.process_command_line() + start = time.time() + combine.combine_groups() + ##combine.write_netcdf() + combine.logger.info("Time to process: %.2f seconds", (time.time() - start)) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 6411d3ce..32584a18 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -24,6 +24,7 @@ BASE_LRAUV_PATH = Path(__file__).parent.joinpath("../../data/lrauv_data").resolve() SUMMARY_SOURCE = "Original LRAUV data extracted from {}, group {}" GROUPS = ["navigation", "ctd", "ecopuck"] # Your actual group names +GROUP = "Group" # A literal in the filename to use for identifying group .nc files SCI_PARMS = { "/": [ @@ -222,7 +223,7 @@ def extract_groups_to_files_netcdf4(self, log_file: str) -> Path: return netcdfs_dir def _extract_root_group(self, src_dataset: netCDF4.Dataset, log_file: str, output_dir: Path): - """Extract variables from the root group to _Group_Universals.nc.""" + """Extract variables from the root group to _{GROUP}_Universals.nc.""" root_parms = SCIENG_PARMS.get("/", []) if not root_parms: return @@ -232,7 +233,7 @@ def _extract_root_group(self, src_dataset: netCDF4.Dataset, log_file: str, outpu vars_to_extract = self._get_available_variables(src_dataset, root_parms) if vars_to_extract: - output_file = output_dir / f"{Path(log_file).stem}_Group_Universals.nc" + output_file = output_dir / f"{Path(log_file).stem}_{GROUP}_Universals.nc" self._create_netcdf_file(src_dataset, vars_to_extract, output_file) self.logger.info("Extracted root group '/' to %s", output_file) else: @@ -244,7 +245,7 @@ def _extract_root_group(self, src_dataset: netCDF4.Dataset, log_file: str, outpu def _extract_single_group( self, src_dataset: netCDF4.Dataset, group_name: str, log_file: str, output_dir: Path ): - """Extract a single group to its own NetCDF file named like _Group_.nc.""" + "Extract a single group to its own NetCDF file named like _{GROUP}_.nc." group_parms = SCIENG_PARMS[group_name] try: @@ -254,7 +255,7 @@ def _extract_single_group( vars_to_extract = self._get_available_variables(src_group, group_parms) if vars_to_extract: - output_file = output_dir / f"{Path(log_file).stem}_Group_{group_name}.nc" + output_file = output_dir / f"{Path(log_file).stem}_{GROUP}_{group_name}.nc" self._create_netcdf_file(src_group, vars_to_extract, output_file) self.logger.info("Extracted %s to %s", group_name, output_file) else: @@ -347,11 +348,12 @@ def _get_time_filters_for_variables( filtered = len(mono_indices) < len(time_data) if filtered: self.logger.info( - "Time coordinate %s: filtered %d non-monotonic points (%d -> %d)", + "Time coordinate %s: filtered %d non-monotonic points (%d -> %d), %.2f%%", time_coord_name, len(time_data) - len(mono_indices), len(time_data), len(mono_indices), + 100 * (len(time_data) - len(mono_indices)) / len(time_data), ) time_filters[time_coord_name] = {"indices": mono_indices, "filtered": filtered} From ff48d0d36dd9d8fec51e8a736da81735377d4674 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 28 Oct 2025 16:51:17 -0700 Subject: [PATCH 030/121] Remove inheritance of add_global_metadata() to simplify things. --- src/data/AUV.py | 26 +------------------------- src/data/correct_log_times.py | 3 +-- src/data/logs2netcdfs.py | 27 +++++++++++++++++++++++++-- 3 files changed, 27 insertions(+), 29 deletions(-) diff --git a/src/data/AUV.py b/src/data/AUV.py index 75accb25..c8bef718 100755 --- a/src/data/AUV.py +++ b/src/data/AUV.py @@ -9,9 +9,8 @@ """ import logging -from datetime import UTC, datetime +from datetime import datetime -import coards import numpy as np import xarray as xr @@ -28,29 +27,6 @@ def monotonic_increasing_time_indices(time_array: np.array) -> np.ndarray: return np.array(monotonic) -class AUV: - def add_global_metadata(self): - iso_now = datetime.now(UTC).isoformat() + "Z" - - self.nc_file.netcdf_version = "4" - self.nc_file.Conventions = "CF-1.6" - self.nc_file.date_created = iso_now - self.nc_file.date_update = iso_now - self.nc_file.date_modified = iso_now - self.nc_file.featureType = "trajectory" - - self.nc_file.comment = "" - - self.nc_file.time_coverage_start = ( - coards.from_udunits(self.time[0], self.time.units).isoformat() + "Z" - ) - self.nc_file.time_coverage_end = ( - coards.from_udunits(self.time[-1], self.time.units).isoformat() + "Z" - ) - - self.nc_file.distribution_statement = "Any use requires prior approval from MBARI" - - def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 nav_longitude: xr.DataArray, nav_latitude: xr.DataArray, diff --git a/src/data/correct_log_times.py b/src/data/correct_log_times.py index 6604cf22..0a417f84 100755 --- a/src/data/correct_log_times.py +++ b/src/data/correct_log_times.py @@ -18,7 +18,6 @@ from pathlib import Path from shutil import copyfile -from AUV import AUV from logs2netcdfs import AUV_NetCDF from readauvlog import log_record @@ -41,7 +40,7 @@ TIME = "time" -class TimeCorrect(AUV): +class TimeCorrect: logger = logging.getLogger(__name__) _handler = logging.StreamHandler() _handler.setFormatter(AUV_NetCDF._formatter) diff --git a/src/data/logs2netcdfs.py b/src/data/logs2netcdfs.py index 28da0359..33d4e315 100755 --- a/src/data/logs2netcdfs.py +++ b/src/data/logs2netcdfs.py @@ -17,15 +17,17 @@ import subprocess import sys import time +from datetime import UTC, datetime from http import HTTPStatus from pathlib import Path import aiofiles +import coards import numpy as np import requests from aiohttp import ClientSession from aiohttp.client_exceptions import ClientConnectorError -from AUV import AUV, monotonic_increasing_time_indices +from AUV import monotonic_increasing_time_indices from netCDF4 import Dataset from readauvlog import log_record @@ -57,7 +59,7 @@ class CustomException(Exception): pass -class AUV_NetCDF(AUV): +class AUV_NetCDF: logger = logging.getLogger(__name__) _handler = logging.StreamHandler() _formatter = logging.Formatter( @@ -662,6 +664,27 @@ def _remove_bad_values(self, netcdf_filename): self.nc_file.close() self.logger.info("Wrote (without bad values) %s", netcdf_filename) + def add_global_metadata(self): + iso_now = datetime.now(UTC).isoformat() + "Z" + + self.nc_file.netcdf_version = "4" + self.nc_file.Conventions = "CF-1.6" + self.nc_file.date_created = iso_now + self.nc_file.date_update = iso_now + self.nc_file.date_modified = iso_now + self.nc_file.featureType = "trajectory" + + self.nc_file.comment = "" + + self.nc_file.time_coverage_start = ( + coards.from_udunits(self.time[0], self.time.units).isoformat() + "Z" + ) + self.nc_file.time_coverage_end = ( + coards.from_udunits(self.time[-1], self.time.units).isoformat() + "Z" + ) + + self.nc_file.distribution_statement = "Any use requires prior approval from MBARI" + def _process_log_file(self, log_filename, netcdf_filename, src_dir=None): log_data = self.read(log_filename) if Path(netcdf_filename).exists(): From 0022af24de5bb9f83c743442c8768a7d4c6fc2c9 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 28 Oct 2025 21:57:05 -0700 Subject: [PATCH 031/121] Pass log_file and group_name down calling tree for better logs and metadata. --- src/data/nc42netcdfs.py | 203 ++++++++++++++++++++-------------------- 1 file changed, 103 insertions(+), 100 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 32584a18..aa3ace2b 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -12,12 +12,13 @@ import logging import os import sys +from datetime import UTC, datetime from pathlib import Path from typing import Any +import git import netCDF4 import pooch -import xarray as xr # Local directory that serves as the work area for log_files and netcdf files BASE_LRAUV_WEB = "https://dods.mbari.org/data/lrauv/" @@ -99,7 +100,9 @@ "DeadReckonUsingMultipleVelocitySources": [ { "name": "fix_residual_percent_distance_traveled", - "rename": "fix_residual_percent_distance_traveled_DeadReckonUsingMultipleVelocitySources", # noqa: E501 + "rename": ( + "fix_residual_percent_distance_traveled_DeadReckonUsingMultipleVelocitySources" + ), }, {"name": "longitude", "rename": "pose_longitude_DeadReckonUsingMultipleVelocitySources"}, {"name": "latitude", "rename": "pose_latitude_DeadReckonUsingMultipleVelocitySources"}, @@ -209,7 +212,7 @@ def extract_groups_to_files_netcdf4(self, log_file: str) -> Path: self.logger.info("Extracting data from %s", input_file) with netCDF4.Dataset(input_file, "r") as src_dataset: # Extract root group first - self._extract_root_group(src_dataset, log_file, netcdfs_dir) + self._extract_root_group(log_file, "/", src_dataset, netcdfs_dir) # Extract all other groups all_groups = list(src_dataset.groups.keys()) @@ -218,11 +221,13 @@ def extract_groups_to_files_netcdf4(self, log_file: str) -> Path: if group_name != "/" and group_name not in all_groups: self.logger.warning("Group %s not found in %s", group_name, input_file) continue - self._extract_single_group(src_dataset, group_name, log_file, netcdfs_dir) + self._extract_single_group(log_file, group_name, src_dataset, netcdfs_dir) return netcdfs_dir - def _extract_root_group(self, src_dataset: netCDF4.Dataset, log_file: str, output_dir: Path): + def _extract_root_group( + self, log_file: str, group_name: str, src_dataset: netCDF4.Dataset, output_dir: Path + ): """Extract variables from the root group to _{GROUP}_Universals.nc.""" root_parms = SCIENG_PARMS.get("/", []) if not root_parms: @@ -234,7 +239,9 @@ def _extract_root_group(self, src_dataset: netCDF4.Dataset, log_file: str, outpu if vars_to_extract: output_file = output_dir / f"{Path(log_file).stem}_{GROUP}_Universals.nc" - self._create_netcdf_file(src_dataset, vars_to_extract, output_file) + self._create_netcdf_file( + log_file, group_name, src_dataset, vars_to_extract, output_file + ) self.logger.info("Extracted root group '/' to %s", output_file) else: self.logger.warning("No requested variables found in root group '/'") @@ -243,7 +250,11 @@ def _extract_root_group(self, src_dataset: netCDF4.Dataset, log_file: str, outpu self.logger.warning("Could not extract root group '/': %s", e) def _extract_single_group( - self, src_dataset: netCDF4.Dataset, group_name: str, log_file: str, output_dir: Path + self, + log_file: str, + group_name: str, + src_dataset: netCDF4.Dataset, + output_dir: Path, ): "Extract a single group to its own NetCDF file named like _{GROUP}_.nc." group_parms = SCIENG_PARMS[group_name] @@ -256,7 +267,9 @@ def _extract_single_group( if vars_to_extract: output_file = output_dir / f"{Path(log_file).stem}_{GROUP}_{group_name}.nc" - self._create_netcdf_file(src_group, vars_to_extract, output_file) + self._create_netcdf_file( + log_file, group_name, src_group, vars_to_extract, output_file + ) self.logger.info("Extracted %s to %s", group_name, output_file) else: self.logger.warning("No requested variables found in group %s", group_name) @@ -310,7 +323,7 @@ def _find_time_coordinate(self, src_group: netCDF4.Group) -> str: return "" def _get_time_filters_for_variables( - self, src_group: netCDF4.Group, vars_to_extract: list[str] + self, log_file: str, group_name: str, src_group: netCDF4.Group, vars_to_extract: list[str] ) -> dict[str, dict]: """Get time filtering information for time coordinates used by vars_to_extract. @@ -325,6 +338,10 @@ def _get_time_filters_for_variables( # Find all time coordinates used by variables in extraction list time_coords_found = set() + self.logger.debug( + "=================================== Group: %s =======================================", + group_name, + ) for var_name in vars_to_extract: if var_name in src_group.variables: var = src_group.variables[var_name] @@ -342,28 +359,32 @@ def _get_time_filters_for_variables( for time_coord_name in time_coords_found: time_var = src_group.variables[time_coord_name] time_data = time_var[:] + self.logger.debug("Time coordinate %s: %d points", time_coord_name, len(time_data)) mono_indices = self._get_monotonic_indices(time_data) # Check if filtering was actually needed filtered = len(mono_indices) < len(time_data) + comment = "" if filtered: - self.logger.info( - "Time coordinate %s: filtered %d non-monotonic points (%d -> %d), %.2f%%", - time_coord_name, - len(time_data) - len(mono_indices), - len(time_data), - len(mono_indices), - 100 * (len(time_data) - len(mono_indices)) / len(time_data), + comment = ( + f"Filtered {len(time_data) - len(mono_indices)} non-monotonic points " + f"({len(time_data)} -> {len(mono_indices)}), " + f"{100 * (len(time_data) - len(mono_indices)) / len(time_data):.2f}%" ) + self.logger.info("Time coordinate %s: %s", time_coord_name, comment) - time_filters[time_coord_name] = {"indices": mono_indices, "filtered": filtered} + time_filters[time_coord_name] = { + "indices": mono_indices, + "filtered": filtered, + "comment": comment, + } return time_filters def _is_time_variable(self, var_name: str, var) -> bool: """Check if a variable is a time coordinate variable.""" # Check name pattern - if "time" in var_name.lower(): + if var_name.lower().endswith("time"): return True # Check units @@ -379,37 +400,7 @@ def _get_monotonic_indices(self, time_data) -> list[int]: """Get indices for monotonic time values from time data array.""" mono_indices = [] if len(time_data) > 0: - mono_indices.append(0) # Always include first point - - for i in range(1, len(time_data)): - if time_data[i] > time_data[mono_indices[-1]]: - mono_indices.append(i) - - return mono_indices - - def _get_monotonic_time_indices(self, src_group: netCDF4.Group) -> tuple[list[int], bool]: - """Get indices for monotonically increasing time data. - - Returns: - list[int]: List of indices for monotonic time points - bool: True if filtering was applied - """ - # Check if time filtering is enabled - if not getattr(self.args, "filter_monotonic_time", True): - return [], False - - # Find the time coordinate variable using introspection - time_var_name = self._find_time_coordinate(src_group) - if not time_var_name: - # No time variable found, return all data - return [], False - - time_var = src_group.variables[time_var_name] - time_data = time_var[:] - - # Find monotonically increasing indices - mono_indices = [] - if len(time_data) > 0: + # TODO: What if first point is not valid? May need to a pre-filtering step. mono_indices.append(0) # Always include first point for i in range(1, len(time_data)): @@ -417,27 +408,13 @@ def _get_monotonic_time_indices(self, src_group: netCDF4.Group) -> tuple[list[in mono_indices.append(i) else: self.logger.debug( - "Non-monotonic time value at index %d: %s <= %s (var: %s)", + "Non-monotonic time value at index %8d: %17.6f <= %17.6f", i, time_data[i], time_data[mono_indices[-1]], - time_var_name, ) - total_points = len(time_data) - filtered_points = len(mono_indices) - - if filtered_points < total_points: - self.logger.warning( - "Filtered %d non-monotonic time points (kept %d/%d) for variable %s", - total_points - filtered_points, - filtered_points, - total_points, - time_var_name, - ) - return mono_indices, True - - return mono_indices, False + return mono_indices def _copy_variable_with_appropriate_time_filter( self, @@ -466,6 +443,7 @@ def _copy_variable_with_appropriate_time_filter( # This is a time coordinate variable that needs filtering time_indices = time_filters[var_name]["indices"] dst_var[:] = src_var[:][time_indices] + dst_var.setncattr("comment", time_filters[var_name]["comment"]) self.logger.debug("Applied time filtering to time coordinate %s", var_name) # Check if this variable depends on any filtered time dimensions @@ -561,21 +539,34 @@ def _create_dimensions_with_time_filters( dst_dataset.createDimension(dim_name, size) - def _create_netcdf_file( - self, src_group: netCDF4.Group, vars_to_extract: list[str], output_file: Path + def _create_netcdf_file( # noqa: PLR0913 + self, + log_file: str, + group_name: str, + src_group: netCDF4.Group, + vars_to_extract: list[str], + output_file: Path, ): """Create a new NetCDF file with the specified variables and monotonic time.""" # Get time filtering information for each time variable - time_filters = self._get_time_filters_for_variables(src_group, vars_to_extract) + time_filters = self._get_time_filters_for_variables( + log_file, group_name, src_group, vars_to_extract + ) with netCDF4.Dataset(output_file, "w", format="NETCDF4") as dst_dataset: # Copy global attributes self._copy_global_attributes(src_group, dst_dataset) + # Add standard global attributes + log_file = self.args.log_file + for attr_name, attr_value in self.global_metadata(log_file, group_name).items(): + dst_dataset.setncattr(attr_name, attr_value) + # Add note about time filtering if applied if any(tf["filtered"] for tf in time_filters.values()): dst_dataset.setncattr( - "processing_note", "Non-monotonic time values filtered out during extraction" + "processing_note", + "Non-monotonic time values filtered from original, see comment in variables", ) # Create dimensions - may need to adjust time dimension sizes @@ -659,37 +650,49 @@ def _copy_variable(self, src_group: netCDF4.Group, dst_dataset: netCDF4.Dataset, except Exception as e: # noqa: BLE001 self.logger.warning("Failed to copy variable %s: %s", var_name, e) - def extract_groups_to_files(self, input_file, output_dir): - """Extract each group to a separate NetCDF file.""" - output_dir = Path(output_dir) - output_dir.mkdir(exist_ok=True, parents=True) - - all_groups = self.get_groups_netcdf4(input_file) - - self.logger.info("Extracting data from %s", input_file) - for group_name, group_parms in SCIENG_PARMS.items(): - if group_name not in all_groups: - self.logger.warning("Group %s not found in %s", group_name, input_file) - continue - try: - self.logger.info(" Group %s", group_name) - ds = xr.open_dataset(input_file, group=group_name) - output_file = output_dir / f"{group_name}.nc" - # Output only the variables of interest - parms = [p["name"] for p in group_parms if "name" in p] - self.logger.debug(" Variables to extract: %s", parms) - ds = ds[parms] - ds.to_netcdf(path=str(output_file), format="NETCDF4") - ds.close() - self.logger.info("Extracted %s to %s", group_name, output_file) - except (FileNotFoundError, OSError, ValueError): - self.logger.warning("Could not extract %s", group_name) - except KeyError: - self.logger.warning("Variable %s not found in group %s", parms, group_name) - except TypeError: - self.logger.warning( - "Type error processing group %s: %s", group_name, sys.exc_info() - ) + def global_metadata(self, log_file: str, group_name: str): + """Use instance variables to return a dictionary of + metadata specific for the data that are written + """ + repo = git.Repo(search_parent_directories=True) + try: + gitcommit = repo.head.object.hexsha + except (ValueError, BrokenPipeError) as e: + self.logger.warning( + "could not get head commit sha for %s: %s", + repo.remotes.origin.url, + e, + ) + gitcommit = "" + iso_now = datetime.now(UTC).isoformat() + "Z" + + metadata = {} + metadata["netcdf_version"] = "4" + metadata["Conventions"] = "CF-1.6" + metadata["date_created"] = iso_now + metadata["date_update"] = iso_now + metadata["date_modified"] = iso_now + + metadata["distribution_statement"] = "Any use requires prior approval from MBARI" + metadata["license"] = metadata["distribution_statement"] + metadata["useconst"] = "Not intended for legal use. Data may contain inaccuracies." + metadata["history"] = f"Created by {self.commandline} on {iso_now}" + log_file = self.args.log_file + metadata["title"] = f"Extracted LRAUV data from {log_file}, Group: {group_name}" + metadata["source"] = ( + f"MBARI LRAUV data extracted from {log_file}" + f" with execution of '{self.commandline}' at {iso_now}" + f" using git commit {gitcommit} from" + f" software at 'https://github.com/mbari-org/auv-python'" + ) + metadata["summary"] = ( + "Observational oceanographic data obtained from a Long Range Autonomous" + " Underwater Vehicle mission with measurements at original sampling" + f" intervals. The data in group {group_name} have been extracted from the" + " original .nc4 log file with non-monotonic time values removed using" + " MBARI's auv-python software" + ) + return metadata def process_command_line(self): examples = "Examples:" + "\n\n" From a87f35f307b0eb01768701e69f52c90e07664f71 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 28 Oct 2025 21:58:18 -0700 Subject: [PATCH 032/121] WIP: Write out combined (_cal.nc) file. --- src/data/combine.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index f39a1bd0..e29963cb 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -181,12 +181,12 @@ def global_metadata(self): metadata["license"] = metadata["distribution_statement"] metadata["useconst"] = "Not intended for legal use. Data may contain inaccuracies." metadata["history"] = f"Created by {self.commandline} on {iso_now}" - + log_file = self.args.log_file metadata["title"] = ( - f"Calibrated AUV sensor data from {self.args.auv_name} mission {self.args.mission}" + f"Combined LRAUV data from {log_file} - relevant variables extracted for STOQS" ) metadata["summary"] = ( - "Observational oceanographic data obtained from an Autonomous" + "Observational oceanographic data obtained from a Long Range Autonomous" " Underwater Vehicle mission with measurements at" " original sampling intervals. The data have been processed" " by MBARI's auv-python software." @@ -557,13 +557,23 @@ def combine_groups(self): group_files = sorted(src_dir.glob(f"{Path(log_file).stem}_{GROUP}_*.nc")) self.combined_nc = xr.Dataset() for group_file in group_files: - self.logger.info("Found group file: %s", group_file) + self.logger.info("Group file: %s", group_file.name) # Make nudged_longitude, nudged_latitude = self._nudge_pos() call on when appropriate + # Loop through each variable in the group file and add it to the combined_nc member list + with xr.open_dataset(group_file) as ds: + for orig_var in ds.variables: + if orig_var.lower().endswith("time"): + self.logger.debug("Skipping time variable: %s", orig_var) + continue + new_group = group_file.stem.split(f"{GROUP}_")[1].replace("_", "").lower() + new_var = new_group + "_" + orig_var.lower() + self.logger.info("Adding variable %-65s %s", f"{orig_var} as", new_var) + self.combined_nc[new_var] = ds[orig_var] def write_netcdf(self) -> None: log_file = self.args.log_file netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) - out_fn = Path(netcdfs_dir, f"{self.args.log_file.stem}_cal.nc") + out_fn = Path(netcdfs_dir, f"{Path(log_file).stem}_cal.nc") self.combined_nc.attrs = self.global_metadata() self.logger.info("Writing combined group data to %s", out_fn) @@ -641,5 +651,5 @@ def process_command_line(self): combine.process_command_line() start = time.time() combine.combine_groups() - ##combine.write_netcdf() + combine.write_netcdf() combine.logger.info("Time to process: %.2f seconds", (time.time() - start)) From e04751995818d46e85fbc8fb35c744ad9af3d4bb Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 30 Oct 2025 11:01:57 -0700 Subject: [PATCH 033/121] Remove unused options. --- src/data/nc42netcdfs.py | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index aa3ace2b..6b8b9942 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -705,42 +705,6 @@ def process_command_line(self): description=__doc__, epilog=examples, ) - - parser.add_argument( - "--base_path", - action="store", - default=BASE_LRAUV_PATH, - help=( - "Base directory for missionlogs and missionnetcdfs, " - "default: auv_data in repo data directory" - ), - ) - parser.add_argument( - "--title", - action="store", - help="A short description of the dataset", - ) - parser.add_argument( - "--summary", - action="store", - help="Additional information about the dataset", - ) - - parser.add_argument( - "--noinput", - action="store_true", - help="Execute without asking for a response, e.g. to not ask to re-download file", - ) - parser.add_argument( - "--clobber", - action="store_true", - help="Use with --noinput to overwrite existing downloaded log files", - ) - parser.add_argument( - "--noreprocess", - action="store_true", - help="Use with --noinput to not re-process existing downloaded log files", - ) parser.add_argument( "--filter_monotonic_time", action="store_true", From 972cf5162e6389f9a308a656e322163976af59ca Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 31 Oct 2025 12:19:04 -0700 Subject: [PATCH 034/121] Add reasonal bounds time coordinate filtering, add --plot_time option. --- src/data/nc42netcdfs.py | 317 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 288 insertions(+), 29 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 6b8b9942..0d0a46f2 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -18,8 +18,17 @@ import git import netCDF4 +import numpy as np import pooch +# Conditional imports for plotting (only when needed) +try: + import matplotlib.pyplot as plt # noqa: F401 + + MATPLOTLIB_AVAILABLE = True +except ImportError: + MATPLOTLIB_AVAILABLE = False + # Local directory that serves as the work area for log_files and netcdf files BASE_LRAUV_WEB = "https://dods.mbari.org/data/lrauv/" BASE_LRAUV_PATH = Path(__file__).parent.joinpath("../../data/lrauv_data").resolve() @@ -276,8 +285,8 @@ def _extract_single_group( except KeyError: self.logger.warning("Group %s not found", group_name) - except Exception as e: # noqa: BLE001 - self.logger.warning("Could not extract %s: %s", group_name, e) + # except Exception as e: # noqa: BLE001 + # self.logger.warning("Could not extract %s: %s", group_name, e) def _get_available_variables( self, src_group: netCDF4.Group, group_parms: list[dict[str, Any]] @@ -330,13 +339,36 @@ def _get_time_filters_for_variables( Returns: dict: Map of time_coord_name -> {"indices": list[int], "filtered": bool} """ - time_filters = {} - # Check if time filtering is enabled if not getattr(self.args, "filter_monotonic_time", True): - return time_filters + return {} + self.logger.info("========================= Group %s =========================", group_name) # Find all time coordinates used by variables in extraction list + time_coords_found = self._find_time_coordinates(group_name, src_group, vars_to_extract) + + # Parse plot time settings once + plot_group_name, plot_time_coord_name = self._parse_plot_time_argument() + + # Process each unique time coordinate found + time_filters = {} + for time_coord_name in sorted(time_coords_found): + time_filter = self._process_single_time_coordinate( + log_file, + group_name, + src_group, + time_coord_name, + plot_group_name, + plot_time_coord_name, + ) + time_filters[time_coord_name] = time_filter + + return time_filters + + def _find_time_coordinates( + self, group_name: str, src_group: netCDF4.Group, vars_to_extract: list[str] + ) -> set[str]: + """Find all time coordinates used by variables in extraction list.""" time_coords_found = set() self.logger.debug( "=================================== Group: %s =======================================", @@ -355,31 +387,111 @@ def _get_time_filters_for_variables( if self._is_time_variable(dim_name, dim_var): time_coords_found.add(dim_name) - # Now process each unique time coordinate found - for time_coord_name in time_coords_found: - time_var = src_group.variables[time_coord_name] - time_data = time_var[:] - self.logger.debug("Time coordinate %s: %d points", time_coord_name, len(time_data)) - mono_indices = self._get_monotonic_indices(time_data) - - # Check if filtering was actually needed - filtered = len(mono_indices) < len(time_data) - comment = "" - if filtered: - comment = ( - f"Filtered {len(time_data) - len(mono_indices)} non-monotonic points " - f"({len(time_data)} -> {len(mono_indices)}), " - f"{100 * (len(time_data) - len(mono_indices)) / len(time_data):.2f}%" - ) - self.logger.info("Time coordinate %s: %s", time_coord_name, comment) + return time_coords_found + + def _parse_plot_time_argument(self) -> tuple[str | None, str | None]: + """Parse the --plot_time argument and return (group_name, time_coord_name).""" + if not getattr(self.args, "plot_time", None): + return None, None + + plot_time = self.args.plot_time + if not plot_time.startswith("/"): + msg = "Invalid plot_time format, must be //" + raise ValueError(msg) + + slash_count = plot_time.count("/") + if slash_count == 1: + return "/", plot_time[1:] + if slash_count == 2: # noqa: PLR2004 + parts = plot_time.split("/")[1:] + return parts[0], parts[1] + + msg = "Invalid plot_time format, must be //" + raise ValueError(msg) + + def _create_plot_data( + self, log_file: str, group_name: str, time_coord_name: str, original_time_data + ) -> dict: + """Create plot data structure for time filtering visualization.""" + return { + "original": original_time_data.copy(), + "log_file": log_file, + "group_name": group_name, + "variable_name": time_coord_name, + } - time_filters[time_coord_name] = { - "indices": mono_indices, - "filtered": filtered, - "comment": comment, - } + def _create_time_filter_result( + self, mono_indices: list[int], time_data_length: int, time_coord_name: str + ) -> dict: + """Create the result dictionary for a time filter.""" + filtered = len(mono_indices) < time_data_length + comment = "" + if filtered: + removed_count = time_data_length - len(mono_indices) + removed_percent = 100 * removed_count / time_data_length + comment = ( + f"Filtered {removed_count} non-monotonic points " + f"({time_data_length} -> {len(mono_indices)}), " + f"{removed_percent:.2f}%" + ) + self.logger.info("Time coordinate %s: %s", time_coord_name, comment) - return time_filters + return { + "indices": mono_indices, + "filtered": filtered, + "comment": comment, + } + + def _process_single_time_coordinate( # noqa: PLR0913 + self, + log_file: str, + group_name: str, + src_group: netCDF4.Group, + time_coord_name: str, + plot_group_name: str | None, + plot_time_coord_name: str | None, + ) -> dict: + """Process filtering for a single time coordinate.""" + from scipy.signal import medfilt + + time_var = src_group.variables[time_coord_name] + original_time_data = time_var[:] + self.logger.info("Time coordinate %s: %d points", time_coord_name, len(original_time_data)) + + # Create plot data if this coordinate should be plotted + plot_data = None + should_plot = ( + plot_time_coord_name is not None + and time_coord_name == plot_time_coord_name + and group_name == plot_group_name + ) + if should_plot: + plot_data = self._create_plot_data( + log_file, group_name, time_coord_name, original_time_data + ) + + # First filter out values that fall outside of reasonable bounds + valid_indices = self._filter_valid_time_indices(original_time_data) + + # Despike to remove single point outliers before getting monotonic indices + time_data = medfilt(original_time_data[valid_indices], kernel_size=3) + + # Store valid indices and despiked data for plotting + if plot_data is not None: + plot_data["valid_indices"] = valid_indices + plot_data["valid_data"] = original_time_data[valid_indices] + plot_data["despiked"] = time_data.copy() + + # Now apply monotonic filtering to the valid subset + mono_indices = self._get_monotonic_indices(time_data) + + # Generate plot if requested for this variable + if plot_data is not None: + plot_data["final_indices"] = mono_indices + plot_data["final_data"] = time_data[mono_indices] + self._plot_time_filtering(plot_data) + + return self._create_time_filter_result(mono_indices, len(time_data), time_coord_name) def _is_time_variable(self, var_name: str, var) -> bool: """Check if a variable is a time coordinate variable.""" @@ -396,6 +508,50 @@ def _is_time_variable(self, var_name: str, var) -> bool: return False + def _filter_valid_time_indices(self, time_data) -> list[int]: + """Filter out wildly invalid time values before monotonic filtering. + + Returns indices of time values that are reasonable Unix epoch timestamps. + Uses numpy for efficient vectorized operations. + """ + # LRAUV data bounds: September 2012 to current + 5 years buffer + lrauv_start_date = datetime(2012, 9, 1, tzinfo=UTC) + current_date = datetime.now(UTC) + future_buffer_date = current_date.replace(year=current_date.year + 5) + + MIN_UNIX_TIME = int(lrauv_start_date.timestamp()) # September 1, 2012 UTC + MAX_UNIX_TIME = int(future_buffer_date.timestamp()) # Current + 5 years buffer + + # Convert to numpy array for efficient operations + time_array = np.asarray(time_data) + + # Create boolean masks for valid conditions + is_finite = np.isfinite(time_array) + is_in_range = (time_array >= MIN_UNIX_TIME) & (time_array <= MAX_UNIX_TIME) + + # Combine all conditions - all must be True for valid indices + valid_mask = is_finite & is_in_range + + # Get indices where all conditions are met + valid_indices = np.where(valid_mask)[0].tolist() + + # Log filtering statistics + total_count = len(time_array) + outliers_found = total_count - len(valid_indices) + + if outliers_found > 0: + non_finite = np.sum(~is_finite) + out_of_range = np.sum(~is_in_range & is_finite) + + self.logger.info( + "Pre-filtered %d invalid time values: %d non-finite, %d out-of-range", + outliers_found, + non_finite, + out_of_range, + ) + + return valid_indices + def _get_monotonic_indices(self, time_data) -> list[int]: """Get indices for monotonic time values from time data array.""" mono_indices = [] @@ -416,6 +572,92 @@ def _get_monotonic_indices(self, time_data) -> list[int]: return mono_indices + def _plot_time_filtering(self, plot_data: dict): + """Plot before and after time coordinate filtering.""" + if not MATPLOTLIB_AVAILABLE: + self.logger.error("Matplotlib not available. Install with: uv add matplotlib") + return + + # Import matplotlib here to avoid import errors when not needed + import matplotlib.pyplot as plt # noqa: F401 + + original = plot_data["original"] + valid_indices = plot_data["valid_indices"] + valid_data = plot_data["valid_data"] + despiked = plot_data["despiked"] + final_indices = plot_data["final_indices"] + final_data = plot_data["final_data"] + + # Create figure with subplots + fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(12, 9), sharex=True) + + # Plot 1: Original data + ax1.plot(original, "b-", label="Original", alpha=0.7) + ax1.set_ylabel("Time Value") + ax1.set_title( + f"Time Coordinate Filtering: {plot_data['variable_name']}\n" + f"File: {plot_data['log_file']}, Group: {plot_data['group_name']}" + ) + ax1.legend() + ax1.grid(visible=True, alpha=0.3) + + # Plot 2: After valid Values filtering + ax2.plot(valid_indices, valid_data, "m.-", label="After Valid Values Filter", alpha=0.7) + ax2.set_ylabel("Time Value") + ax2.legend() + ax2.grid(visible=True, alpha=0.3) + ax2.text( + 0.02, + 0.60, + f"Points removed: {len(original) - len(valid_data)}\n", + transform=ax2.transAxes, + verticalalignment="top", + bbox={"boxstyle": "round", "facecolor": "wheat"}, + ) + + # Plot 3: After despiking + ax3.plot(despiked, "g-", label="After Median Filter (3-point)", alpha=0.7) + ax3.set_ylabel("Time Value") + ax3.legend() + ax3.grid(visible=True, alpha=0.3) + ax3.text( + 0.02, + 0.60, + f"Points removed: {len(valid_data) - len(despiked)}\n", + transform=ax3.transAxes, + verticalalignment="top", + bbox={"boxstyle": "round", "facecolor": "wheat"}, + ) + + # Plot 4: Final After Monotonic filtered data + ax4.plot(final_indices, final_data, "r.-", label="After Monotonic Filter", alpha=0.7) + ax4.set_xlabel("Index") + ax4.set_ylabel("Time Value") + ax4.legend() + ax4.grid(visible=True, alpha=0.3) + + # Add statistics text + stats_text = ( + f"Points removed: {len(despiked) - len(final_data)}\n" + f"Original points: {len(original)}\n" + f"After final filter: {len(final_data)}\n" + f"Total removed: {len(original) - len(final_data)} " + f"({100 * (len(original) - len(final_data)) / len(original):.1f}%)" + ) + ax4.text( + 0.02, + 0.90, + stats_text, + transform=ax4.transAxes, + verticalalignment="top", + bbox={"boxstyle": "round", "facecolor": "wheat"}, + ) + + plt.tight_layout() + plt.show() + + self.logger.info("Time filtering plot displayed for %s", plot_data["variable_name"]) + def _copy_variable_with_appropriate_time_filter( self, src_group: netCDF4.Group, @@ -698,7 +940,14 @@ def process_command_line(self): examples = "Examples:" + "\n\n" examples += " Write to local missionnetcdfs direcory:\n" examples += " " + sys.argv[0] + " --mission 2020.064.10\n" - examples += " " + sys.argv[0] + " --auv_name i2map --mission 2020.055.01\n" + examples += " " + sys.argv[0] + " --auv_name i2map --mission 2020.055.01\n\n" + examples += " Plot time coordinate filtering:\n" + examples += ( + " " + + sys.argv[0] + + " --log_file brizo/missionlogs/2025/20250909_20250915/20250914T080941/" + + "202509140809_202509150109.nc4 --plot_time /latitude_time\n" + ) parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, @@ -756,6 +1005,16 @@ def process_command_line(self): help="Show the variable mapping: Group/variable_names -> their_renames", ), ) + parser.add_argument( + "--plot_time", + action="store", + metavar="VARIABLE_NAME", + help=( + "Plot before and after time coordinate filtering for the specified variable. " + "Shows the effect of outlier removal and monotonic filtering." + "Format for is /Group/variable_name." + ), + ) parser.add_argument( "-v", "--verbose", From 0b70c379431469269c39ec4786975c2ffeeb0236 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 31 Oct 2025 12:19:52 -0700 Subject: [PATCH 035/121] Add test for --plot_time. --- .vscode/launch.json | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index f9b60a3e..4c8e7641 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -54,8 +54,9 @@ "console": "integratedTerminal", // A small log_file that has a reasonable amount of data, and known_hash to verify download //"args": ["-v", "1", "--log_file", "ahi/missionlogs/2025/20250908_20250912/20250911T201546/202509112015_202509112115.nc4", "--known_hash", "d1235ead55023bea05e9841465d54a45dfab007a283320322e28b84438fb8a85"] - // Has bad latitude and longitude values - "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] + // Has bad latitude and longitude values and lots of bad Universal latitude_time values + //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] + "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/latitude_time"] }, { "name": "2.0 - calibrate.py", @@ -134,6 +135,15 @@ //"args": ["-v", "1", "--auv_name", "dorado", "--mission", "2023.192.01"], "args": ["-v", "1", "--auv_name", "dorado", "--mission", "2024.317.01"], }, + { + "name": "3.1 - align.py for LRAUV --log_file", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/src/data/align.py", + "console": "integratedTerminal", + "justMyCode": false, + "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"], + }, { "name": "4.0 - resample.py", "type": "debugpy", From 371f1c521da3e5a809a2070758fe30220c9a255e Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Sat, 1 Nov 2025 13:41:02 -0700 Subject: [PATCH 036/121] Output Group files as NetCDF3 each with one unlimited dimension. --- src/data/nc42netcdfs.py | 114 ++++++++++++++++++++++++++++++---------- 1 file changed, 87 insertions(+), 27 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 0d0a46f2..42d5e672 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -483,15 +483,22 @@ def _process_single_time_coordinate( # noqa: PLR0913 plot_data["despiked"] = time_data.copy() # Now apply monotonic filtering to the valid subset - mono_indices = self._get_monotonic_indices(time_data) + mono_indices_in_filtered = self._get_monotonic_indices(time_data) + + # Convert monotonic indices back to original array indices + # mono_indices_in_filtered are indices into the valid_indices subset + # We need to map them back to indices in the original time array + final_indices = [valid_indices[i] for i in mono_indices_in_filtered] # Generate plot if requested for this variable if plot_data is not None: - plot_data["final_indices"] = mono_indices - plot_data["final_data"] = time_data[mono_indices] + plot_data["final_indices"] = mono_indices_in_filtered + plot_data["final_data"] = time_data[mono_indices_in_filtered] self._plot_time_filtering(plot_data) - return self._create_time_filter_result(mono_indices, len(time_data), time_coord_name) + return self._create_time_filter_result( + final_indices, len(original_time_data), time_coord_name + ) def _is_time_variable(self, var_name: str, var) -> bool: """Check if a variable is a time coordinate variable.""" @@ -674,10 +681,6 @@ def _copy_variable_with_appropriate_time_filter( var_name, src_var.dtype, src_var.dimensions, - zlib=True, - complevel=6, - shuffle=True, - fletcher32=True, ) # Check if this variable itself is a time coordinate that needs filtering @@ -761,25 +764,86 @@ def _create_dimensions_with_time_filters( time_filters: dict[str, dict], ): """Create dimensions in the destination dataset, adjusting time dimensions if filtered.""" + # NetCDF3 allows only one unlimited dimension + primary_time_dim = self._find_primary_time_dimension(src_group, dims_needed, time_filters) + unlimited_dim_created = False + + for dim_name in dims_needed: + if dim_name not in src_group.dimensions: + continue + + src_dim = src_group.dimensions[dim_name] + should_be_unlimited = dim_name == primary_time_dim and not unlimited_dim_created + size = self._calculate_dimension_size( + dim_name, src_dim, time_filters, should_be_unlimited + ) + + # Track if we created the unlimited dimension + if size is None: + unlimited_dim_created = True + + dst_dataset.createDimension(dim_name, size) + + def _find_primary_time_dimension( + self, src_group: netCDF4.Group, dims_needed: set[str], time_filters: dict[str, dict] + ) -> str | None: + """Find the primary time dimension that should be unlimited in NetCDF3.""" for dim_name in dims_needed: if dim_name in src_group.dimensions: src_dim = src_group.dimensions[dim_name] + is_time_like = "time" in dim_name.lower() or dim_name in time_filters + if src_dim.isunlimited() and is_time_like: + return dim_name - # Check if this dimension corresponds to a filtered time variable - if dim_name in time_filters and time_filters[dim_name]["filtered"]: - # Use the number of filtered time points - filtered_size = len(time_filters[dim_name]["indices"]) - size = filtered_size if not src_dim.isunlimited() else None - self.logger.debug( - "Created filtered time dimension %s: %s -> %s", - dim_name, - len(src_dim), - size or filtered_size, - ) - else: - size = len(src_dim) if not src_dim.isunlimited() else None + # Fallback: return first unlimited dimension found + for dim_name in dims_needed: + if dim_name in src_group.dimensions and src_group.dimensions[dim_name].isunlimited(): + return dim_name - dst_dataset.createDimension(dim_name, size) + return None + + def _calculate_dimension_size( + self, + dim_name: str, + src_dim, + time_filters: dict[str, dict], + should_be_unlimited: bool, # noqa: FBT001 + ) -> int | None: + """Calculate the size for a dimension, handling NetCDF3 unlimited dimension constraint.""" + is_filtered_time = dim_name in time_filters and time_filters[dim_name]["filtered"] + + if is_filtered_time: + filtered_size = len(time_filters[dim_name]["indices"]) + if should_be_unlimited: + self.logger.debug( + "Created filtered unlimited time dimension %s: %s -> unlimited (%d points)", + dim_name, + len(src_dim), + filtered_size, + ) + return None # Unlimited + + self.logger.debug( + "Created filtered fixed time dimension %s: %s -> %s", + dim_name, + len(src_dim), + filtered_size, + ) + return filtered_size + + # Non-filtered dimension + if should_be_unlimited: + self.logger.debug("Created unlimited dimension %s", dim_name) + return None + + size = len(src_dim) + if src_dim.isunlimited(): + self.logger.debug( + "Converting unlimited dimension %s to fixed size %s (NetCDF3 limitation)", + dim_name, + size, + ) + return size def _create_netcdf_file( # noqa: PLR0913 self, @@ -795,7 +859,7 @@ def _create_netcdf_file( # noqa: PLR0913 log_file, group_name, src_group, vars_to_extract ) - with netCDF4.Dataset(output_file, "w", format="NETCDF4") as dst_dataset: + with netCDF4.Dataset(output_file, "w", format="NETCDF3_CLASSIC") as dst_dataset: # Copy global attributes self._copy_global_attributes(src_group, dst_dataset) @@ -876,10 +940,6 @@ def _copy_variable(self, src_group: netCDF4.Group, dst_dataset: netCDF4.Dataset, var_name, src_var.dtype, src_var.dimensions, - zlib=True, - complevel=6, - shuffle=True, - fletcher32=True, ) # Copy data and attributes From c9f58d6db4f5341683ca180f50fe6a0bbd62c396 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 6 Nov 2025 08:56:41 -0800 Subject: [PATCH 037/121] Remove unused methods, override coordinates attribute with just '_time' as these are time series, not yet trajectory data. --- src/data/nc42netcdfs.py | 41 +++-------------------------------------- 1 file changed, 3 insertions(+), 38 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 42d5e672..97cc3955 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -189,11 +189,6 @@ def download_with_pooch(self, url, local_dir, known_hash=None): downloader=downloader, ) - def get_groups_netcdf4(self, file_path): - """Get list of groups using netCDF4 library.""" - with netCDF4.Dataset(file_path, "r") as dataset: - return list(dataset.groups.keys()) - def extract_groups_to_files_netcdf4(self, log_file: str) -> Path: """Extract each group from .nc4 file to a separate .nc file using netCDF4 library. @@ -714,6 +709,8 @@ def _copy_variable_with_appropriate_time_filter( # Copy attributes for attr_name in src_var.ncattrs(): dst_var.setncattr(attr_name, src_var.getncattr(attr_name)) + # override any coordinates attribute with just the time coordinate + dst_var.setncattr("coordinates", var_name + "_time") self.logger.debug(" Copied variable: %s", var_name) @@ -872,7 +869,7 @@ def _create_netcdf_file( # noqa: PLR0913 if any(tf["filtered"] for tf in time_filters.values()): dst_dataset.setncattr( "processing_note", - "Non-monotonic time values filtered from original, see comment in variables", + "Non-monotonic time values filtered from original, see variable comments", ) # Create dimensions - may need to adjust time dimension sizes @@ -910,16 +907,6 @@ def _get_required_dimensions( dims_needed.update(var.dimensions) return dims_needed - def _create_dimensions( - self, src_group: netCDF4.Group, dst_dataset: netCDF4.Dataset, dims_needed: set[str] - ): - """Create dimensions in the destination dataset.""" - for dim_name in dims_needed: - if dim_name in src_group.dimensions: - src_dim = src_group.dimensions[dim_name] - size = len(src_dim) if not src_dim.isunlimited() else None - dst_dataset.createDimension(dim_name, size) - def _get_coordinate_variables( self, src_group: netCDF4.Group, dims_needed: set[str], vars_to_extract: list[str] ) -> list[str]: @@ -930,28 +917,6 @@ def _get_coordinate_variables( coord_vars.append(dim_name) # noqa: PERF401 return coord_vars - def _copy_variable(self, src_group: netCDF4.Group, dst_dataset: netCDF4.Dataset, var_name: str): - """Helper method to copy a variable from source to destination.""" - try: - src_var = src_group.variables[var_name] - - # Create variable in destination - dst_var = dst_dataset.createVariable( - var_name, - src_var.dtype, - src_var.dimensions, - ) - - # Copy data and attributes - dst_var[:] = src_var[:] - for attr_name in src_var.ncattrs(): - dst_var.setncattr(attr_name, src_var.getncattr(attr_name)) - - self.logger.debug(" Copied variable: %s", var_name) - - except Exception as e: # noqa: BLE001 - self.logger.warning("Failed to copy variable %s: %s", var_name, e) - def global_metadata(self, log_file: str, group_name: str): """Use instance variables to return a dictionary of metadata specific for the data that are written From b06fafe5ac9b03c16fcd7dec605fe7c0cd963468 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 6 Nov 2025 16:18:12 -0800 Subject: [PATCH 038/121] Use all fixed dimensions (no unlimited) and add attributes so that cf_xarray can be used in combine.py. --- src/data/nc42netcdfs.py | 98 ++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 97cc3955..5c4adc65 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -241,6 +241,15 @@ def _extract_root_group( self.logger.info("Extracting root group '/'") vars_to_extract = self._get_available_variables(src_dataset, root_parms) + # Add debugging output for root group processing + self.logger.info("=== ROOT GROUP DEBUG ===") + self.logger.info("Available variables: %s", sorted(vars_to_extract)) + self.logger.info("Available dimensions: %s", sorted(src_dataset.dimensions.keys())) + self.logger.info( + "Available coordinate variables: %s", + [v for v in sorted(src_dataset.variables.keys()) if v in src_dataset.dimensions], + ) + if vars_to_extract: output_file = output_dir / f"{Path(log_file).stem}_{GROUP}_Universals.nc" self._create_netcdf_file( @@ -369,12 +378,14 @@ def _find_time_coordinates( "=================================== Group: %s =======================================", group_name, ) - for var_name in vars_to_extract: + # Sort variables to make processing deterministic + for var_name in sorted(vars_to_extract): if var_name in src_group.variables: var = src_group.variables[var_name] # Check each dimension to see if it's a time coordinate - for dim_name in var.dimensions: + # Sort dimensions to make processing deterministic + for dim_name in sorted(var.dimensions): if dim_name in src_group.variables: dim_var = src_group.variables[dim_name] @@ -660,7 +671,7 @@ def _plot_time_filtering(self, plot_data: dict): self.logger.info("Time filtering plot displayed for %s", plot_data["variable_name"]) - def _copy_variable_with_appropriate_time_filter( + def _copy_variable_with_appropriate_time_filter( # noqa: C901, PLR0912 self, src_group: netCDF4.Group, dst_dataset: netCDF4.Dataset, @@ -671,6 +682,18 @@ def _copy_variable_with_appropriate_time_filter( try: src_var = src_group.variables[var_name] + # Skip variables that use time dimensions with 0 points + for dim_name in src_var.dimensions: + if ( + dim_name in time_filters + and time_filters[dim_name]["filtered"] + and len(time_filters[dim_name]["indices"]) == 0 + ): + self.logger.debug( + "Skipping variable %s (uses dimension %s with 0 points)", var_name, dim_name + ) + return + # Create variable in destination dst_var = dst_dataset.createVariable( var_name, @@ -709,8 +732,17 @@ def _copy_variable_with_appropriate_time_filter( # Copy attributes for attr_name in src_var.ncattrs(): dst_var.setncattr(attr_name, src_var.getncattr(attr_name)) - # override any coordinates attribute with just the time coordinate - dst_var.setncattr("coordinates", var_name + "_time") + if var_name in time_filters and time_filters[var_name]["filtered"]: + # Downstream process uses cf_xarray to recognize coordinates, add required attribute + dst_var.setncattr("standard_name", "time") + else: + # Override any coordinates attribute in src with just the time coordinate + dst_var.setncattr("coordinates", var_name + "_time") + # Downstream process uses cf_xarray to recognize coordinates, add required attribute + if var_name.startswith(("longitude", "latitude")): + dst_var.setncattr("units", "radians") + elif var_name.startswith("depth"): + dst_var.setncattr("units", "meters") self.logger.debug(" Copied variable: %s", var_name) @@ -761,65 +793,35 @@ def _create_dimensions_with_time_filters( time_filters: dict[str, dict], ): """Create dimensions in the destination dataset, adjusting time dimensions if filtered.""" - # NetCDF3 allows only one unlimited dimension - primary_time_dim = self._find_primary_time_dimension(src_group, dims_needed, time_filters) - unlimited_dim_created = False - + # Use fixed dimensions for all - simpler and avoids NetCDF3 unlimited dimension issues for dim_name in dims_needed: if dim_name not in src_group.dimensions: continue src_dim = src_group.dimensions[dim_name] - should_be_unlimited = dim_name == primary_time_dim and not unlimited_dim_created size = self._calculate_dimension_size( - dim_name, src_dim, time_filters, should_be_unlimited + dim_name, src_dim, time_filters, should_be_unlimited=False ) - # Track if we created the unlimited dimension - if size is None: - unlimited_dim_created = True + # Skip dimensions with 0 points to avoid NetCDF3 conflicts + if size == 0: + self.logger.debug("Skipping dimension %s with 0 points", dim_name) + continue dst_dataset.createDimension(dim_name, size) - def _find_primary_time_dimension( - self, src_group: netCDF4.Group, dims_needed: set[str], time_filters: dict[str, dict] - ) -> str | None: - """Find the primary time dimension that should be unlimited in NetCDF3.""" - for dim_name in dims_needed: - if dim_name in src_group.dimensions: - src_dim = src_group.dimensions[dim_name] - is_time_like = "time" in dim_name.lower() or dim_name in time_filters - if src_dim.isunlimited() and is_time_like: - return dim_name - - # Fallback: return first unlimited dimension found - for dim_name in dims_needed: - if dim_name in src_group.dimensions and src_group.dimensions[dim_name].isunlimited(): - return dim_name - - return None - def _calculate_dimension_size( self, dim_name: str, src_dim, time_filters: dict[str, dict], should_be_unlimited: bool, # noqa: FBT001 - ) -> int | None: - """Calculate the size for a dimension, handling NetCDF3 unlimited dimension constraint.""" + ) -> int: + """Calculate the size for a dimension - always returns fixed size for simplicity.""" is_filtered_time = dim_name in time_filters and time_filters[dim_name]["filtered"] if is_filtered_time: filtered_size = len(time_filters[dim_name]["indices"]) - if should_be_unlimited: - self.logger.debug( - "Created filtered unlimited time dimension %s: %s -> unlimited (%d points)", - dim_name, - len(src_dim), - filtered_size, - ) - return None # Unlimited - self.logger.debug( "Created filtered fixed time dimension %s: %s -> %s", dim_name, @@ -828,18 +830,16 @@ def _calculate_dimension_size( ) return filtered_size - # Non-filtered dimension - if should_be_unlimited: - self.logger.debug("Created unlimited dimension %s", dim_name) - return None - + # Non-filtered dimension - always fixed size size = len(src_dim) if src_dim.isunlimited(): self.logger.debug( - "Converting unlimited dimension %s to fixed size %s (NetCDF3 limitation)", + "Converting unlimited dimension %s to fixed size %s", dim_name, size, ) + else: + self.logger.debug("Created fixed dimension %s: %s", dim_name, size) return size def _create_netcdf_file( # noqa: PLR0913 From 8367bd41b019349c332dfc855af652bc73f2648b Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 6 Nov 2025 16:34:03 -0800 Subject: [PATCH 039/121] Add nudged longitude and latitude variables to the combined_nc dataset. --- src/data/combine.py | 84 +++++++++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index e29963cb..c186f68e 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -285,38 +285,6 @@ def _range_qc_combined_nc( # noqa: C901, PLR0912 self.combined_nc = self.combined_nc.drop_vars(qced_vars) self.logger.info("Done range checking %s", instrument) - def _nudge_pos(self, max_sec_diff_at_end=10): - """Apply linear nudges to underwater latitudes and longitudes so that - they match the surface gps positions. - """ - try: - lon = self.combined_nc["navigation_longitude"] - except KeyError: - error_message = "No navigation_longitude data in combined_nc" - raise EOFError(error_message) from None - lat = self.combined_nc["navigation_latitude"] - lon_fix = self.combined_nc["gps_longitude"] - lat_fix = self.combined_nc["gps_latitude"] - - # Use the shared function from AUV module - lon_nudged, lat_nudged, segment_count, segment_minsum = nudge_positions( - nav_longitude=lon, - nav_latitude=lat, - gps_longitude=lon_fix, - gps_latitude=lat_fix, - logger=self.logger, - auv_name=self.args.auv_name, - mission=self.args.mission, - max_sec_diff_at_end=max_sec_diff_at_end, - create_plots=True, - ) - - # Store results in instance variables for compatibility - self.segment_count = segment_count - self.segment_minsum = segment_minsum - - return lon_nudged, lat_nudged - def _apply_plumbing_lag( self, sensor: str, @@ -551,6 +519,38 @@ def _geometric_depth_correction(self, sensor, orig_nc): return corrected_depth + def _nudge_pos(self, max_sec_diff_at_end=10): + """Apply linear nudges to underwater latitudes and longitudes so that + they match the surface gps positions. + """ + try: + lon = self.combined_nc["universals_longitude"] + except KeyError: + error_message = "No universals_longitude data in combined_nc" + raise EOFError(error_message) from None + lat = self.combined_nc["universals_latitude"] + lon_fix = self.combined_nc["nal9602_longitude_fix"] + lat_fix = self.combined_nc["nal9602_latitude_fix"] + + # Use the shared function from AUV module + lon_nudged, lat_nudged, segment_count, segment_minsum = nudge_positions( + nav_longitude=lon, + nav_latitude=lat, + gps_longitude=lon_fix, + gps_latitude=lat_fix, + logger=self.logger, + auv_name="", + mission="", + max_sec_diff_at_end=max_sec_diff_at_end, + create_plots=True, + ) + + # Store results in instance variables for compatibility + self.segment_count = segment_count + self.segment_minsum = segment_minsum + + return lon_nudged, lat_nudged + def combine_groups(self): log_file = self.args.log_file src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) @@ -558,7 +558,6 @@ def combine_groups(self): self.combined_nc = xr.Dataset() for group_file in group_files: self.logger.info("Group file: %s", group_file.name) - # Make nudged_longitude, nudged_latitude = self._nudge_pos() call on when appropriate # Loop through each variable in the group file and add it to the combined_nc member list with xr.open_dataset(group_file) as ds: for orig_var in ds.variables: @@ -570,6 +569,23 @@ def combine_groups(self): self.logger.info("Adding variable %-65s %s", f"{orig_var} as", new_var) self.combined_nc[new_var] = ds[orig_var] + # Add nudged longitude and latitude variables to the combined_nc dataset + nudged_longitude, nudged_latitude = self._nudge_pos() + self.combined_nc["nudged_longitude"] = nudged_longitude + self.combined_nc["nudged_longitude"].attrs = { + "long_name": "Nudged Longitude", + "standard_name": "longitude", + "units": "degrees_east", + "comment": "Dead reckoned longitude nudged to GPS positions", + } + self.combined_nc["nudged_latitude"] = nudged_latitude + self.combined_nc["nudged_latitude"].attrs = { + "long_name": "Nudged Latitude", + "standard_name": "latitude", + "units": "degrees_north", + "comment": "Dead reckoned latitude nudged to GPS positions", + } + def write_netcdf(self) -> None: log_file = self.args.log_file netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) @@ -613,7 +629,7 @@ def process_command_line(self): "--log_file", action="store", help=( - "Path to the log file for the mission, e.g.: " + "Path to the log file of original LRAUV data, e.g.: " "brizo/missionlogs/2025/20250903_20250909/" "20250905T072042/202509050720_202509051653.nc4" ), From fb8a446f1ded197d3af13b0e81d66fd42950d9a8 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 6 Nov 2025 16:35:28 -0800 Subject: [PATCH 040/121] Use cf_xarray accessors by axis to avoid using actual names for the time coorinate. --- src/data/AUV.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/data/AUV.py b/src/data/AUV.py index c8bef718..4d09ced6 100755 --- a/src/data/AUV.py +++ b/src/data/AUV.py @@ -11,6 +11,7 @@ import logging from datetime import datetime +import cf_xarray # Needed for the .cf accessor # noqa: F401 import numpy as np import xarray as xr @@ -88,22 +89,21 @@ def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 if lon[:][segi].any(): lon_nudged_array = lon[segi] lat_nudged_array = lat[segi] - dt_nudged = lon.get_index("navigation_time")[segi] + dt_nudged = lon.cf["T"][segi] logger.debug( "Filled _nudged arrays with %d values starting at %s " "which were before the first GPS fix at %s", len(segi), - lat.get_index("navigation_time")[0], - lat_fix.get_index("gps_time")[0], + lat.cf["T"].data[0], + lat_fix.cf["T"].data[0], ) else: lon_nudged_array = np.array([]) lat_nudged_array = np.array([]) dt_nudged = np.array([], dtype="datetime64[ns]") if segi.any(): - seg_min = ( - lat.get_index("navigation_time")[segi][-1] - lat.get_index("navigation_time")[segi][0] - ).total_seconds() / 60 + # Return difference of numpy timestamps in units of minutes + seg_min = (lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]).astype("timedelta64[m]") else: seg_min = 0 logger.info( From 18a631b2a388e55f5fd6da5651b76fc571363030 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 6 Nov 2025 16:36:52 -0800 Subject: [PATCH 041/121] Add combine(). --- src/data/process.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/data/process.py b/src/data/process.py index 4dcedd38..fbb0428d 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -68,6 +68,7 @@ class data are: download_process and calibrate, while for LRAUV class data from align import Align_NetCDF, InvalidCalFile from archive import LOG_NAME, Archiver from calibrate import EXPECTED_SENSORS, Calibrate_NetCDF +from combine import Combine_NetCDF from create_products import CreateProducts from dorado_info import FAILED, TEST, dorado_info from emailer import NOTIFICATION_EMAIL, Emailer @@ -739,6 +740,8 @@ def extract(self, log_file: str) -> None: extract = Extract() extract.args = argparse.Namespace() extract.args.verbose = self.args.verbose + extract.args.log_file = self.args.log_file + extract.commandline = self.commandline extract.logger.setLevel(self._log_levels[self.args.verbose]) extract.logger.addHandler(self.log_handler) @@ -748,6 +751,23 @@ def extract(self, log_file: str) -> None: input_file = extract.download_with_pooch(url, output_dir) return extract.extract_groups_to_files_netcdf4(input_file) + def combine(self, log_file: str) -> None: + self.logger.info("Combining netCDF files for log file: %s", log_file) + self.logger.info( + "Equivalent to the calibrate step for Dorado class vehicles. " + "Adds nudge positions and more layers of quality control." + ) + combine = Combine_NetCDF() + combine.args = argparse.Namespace() + combine.args.verbose = self.args.verbose + combine.args.log_file = self.args.log_file + combine.commandline = self.commandline + combine.logger.setLevel(self._log_levels[self.args.verbose]) + combine.logger.addHandler(self.log_handler) + + combine.combine_groups() + combine.write_netcdf() + @log_file_processor def process_log_file(self, log_file: str) -> None: netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) @@ -764,7 +784,8 @@ def process_log_file(self, log_file: str) -> None: self.logger.info("commandline = %s", self.commandline) netcdfs_dir = self.extract(log_file) - # self.align(log_file) + self.combine(log_file=log_file) + self.align(log_file=log_file) # self.resample(log_file) # self.create_products(log_file) self.logger.info("Finished processing log file: %s", log_file) From bab21345a8fe99930cabe0e0eced4ff4e5de461d Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 7 Nov 2025 10:30:57 -0800 Subject: [PATCH 042/121] Add _analyze_original_time_coordinates() to log warnings for log_files like brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4. --- src/data/nc42netcdfs.py | 115 +++++++++++++++++++++++++++++----------- 1 file changed, 83 insertions(+), 32 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 5c4adc65..4ae1fbe8 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -303,38 +303,6 @@ def _get_available_variables( self.logger.debug(" Variables to extract: %s", vars_to_extract) return vars_to_extract - def _find_time_coordinate(self, src_group: netCDF4.Group) -> str: - """Find the time coordinate variable in a group using introspection. - - Returns: - str: Name of the time coordinate variable, or empty string if not found - """ - # Strategy 1: Look for variables with "time" in the name (most common) - time_vars = [var_name for var_name in src_group.variables if "time" in var_name.lower()] - if time_vars: - # Prefer variables that start with 'time' (like time_NAL9602) - time_vars.sort(key=lambda x: (not x.lower().startswith("time"), x)) - self.logger.debug("Found time coordinate %s via name pattern", time_vars[0]) - return time_vars[0] - - # Strategy 2: Look for variables with time-like units - for var_name, var in src_group.variables.items(): - if hasattr(var, "units"): - units = getattr(var, "units", "").lower() - time_patterns = ["seconds since", "days since", "hours since"] - if any(pattern in units for pattern in time_patterns): - self.logger.debug("Found time coordinate %s via units", var_name) - return var_name - - # Strategy 3: Look for unlimited dimension (backup) - for dim_name, dim in src_group.dimensions.items(): - if dim.isunlimited() and dim_name in src_group.variables: - self.logger.debug("Found time coordinate %s via unlimited dimension", dim_name) - return dim_name - - self.logger.debug("No time coordinate found in group") - return "" - def _get_time_filters_for_variables( self, log_file: str, group_name: str, src_group: netCDF4.Group, vars_to_extract: list[str] ) -> dict[str, dict]: @@ -351,6 +319,10 @@ def _get_time_filters_for_variables( # Find all time coordinates used by variables in extraction list time_coords_found = self._find_time_coordinates(group_name, src_group, vars_to_extract) + # Add diagnostic check to compare original time coordinate values + if len(time_coords_found) > 1: + self._analyze_original_time_coordinates(src_group, time_coords_found, group_name) + # Parse plot time settings once plot_group_name, plot_time_coord_name = self._parse_plot_time_argument() @@ -369,6 +341,85 @@ def _get_time_filters_for_variables( return time_filters + def _analyze_original_time_coordinates( + self, src_group: netCDF4.Group, time_coords_found: set[str], group_name: str + ): + """Quick diagnostic for Dead Reckoned timing issues in root group.""" + # Only analyze root group Dead Reckoned coordinates + if group_name != "/": + return + + if ( + "latitude_time" not in time_coords_found + or "longitude_time" not in time_coords_found + or "latitude_time" not in src_group.variables + or "longitude_time" not in src_group.variables + ): + return + + lat_time = src_group.variables["latitude_time"][:] + lon_time = src_group.variables["longitude_time"][:] + + # Quick check for Dead Reckoned timing synchronization + min_len = min(len(lat_time), len(lon_time)) + if min_len == 0: + return + + # Compare overlapping portion + overlap_equal = np.array_equal(lat_time[:min_len], lon_time[:min_len]) + + if overlap_equal and len(lat_time) == len(lon_time): + self.logger.info( + "Dead Reckoned timing: latitude_time and longitude_time are properly synchronized" + ) + return + + # Calculate timing differences for diagnosis + time_diff = lon_time[:min_len] - lat_time[:min_len] + non_zero_mask = time_diff != 0 + num_differences = np.sum(non_zero_mask) + percent_different = 100.0 * num_differences / min_len + + if len(lat_time) != len(lon_time): + self.logger.warning( + "Dead Reckoned timing: Different array lengths - " + "latitude_time: %d, longitude_time: %d", + len(lat_time), + len(lon_time), + ) + + if num_differences > 0: + diff_values = time_diff[non_zero_mask] + max_abs_diff = np.max(np.abs(diff_values)) + + # Define thresholds for Dead Reckoned timing issues + MAJOR_PERCENT_THRESHOLD = 50.0 # 50% different points + MAJOR_TIME_THRESHOLD = 3600.0 # 1 hour difference + MINOR_PERCENT_THRESHOLD = 5.0 # 5% different points + MINOR_TIME_THRESHOLD = 60.0 # 1 minute difference + + if percent_different > MAJOR_PERCENT_THRESHOLD or max_abs_diff > MAJOR_TIME_THRESHOLD: + self.logger.warning( + "Dead Reckoned timing: Significant synchronization issues detected - " + "%.1f%% of coordinates have timing differences (max: %.1f seconds)", + percent_different, + max_abs_diff, + ) + elif percent_different > MINOR_PERCENT_THRESHOLD or max_abs_diff > MINOR_TIME_THRESHOLD: + self.logger.warning( + "Dead Reckoned timing: Minor synchronization issues detected - " + "%.1f%% of coordinates have timing differences (max: %.1f seconds)", + percent_different, + max_abs_diff, + ) + else: + self.logger.info( + "Dead Reckoned timing: Small timing differences detected - " + "%.1f%% of coordinates differ (max: %.1f seconds)", + percent_different, + max_abs_diff, + ) + def _find_time_coordinates( self, group_name: str, src_group: netCDF4.Group, vars_to_extract: list[str] ) -> set[str]: From 31c40305c58b963836451059e9adef68817a016d Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 7 Nov 2025 11:24:27 -0800 Subject: [PATCH 043/121] Handle exceptions more explicitely, improve warning messages. --- src/data/nc42netcdfs.py | 172 ++++++++++++++++++++-------------------- 1 file changed, 86 insertions(+), 86 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 4ae1fbe8..9f0e54f1 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -237,30 +237,26 @@ def _extract_root_group( if not root_parms: return - try: - self.logger.info("Extracting root group '/'") - vars_to_extract = self._get_available_variables(src_dataset, root_parms) + self.logger.info("Extracting root group '/'") + vars_to_extract, _ = self._get_available_variables(src_dataset, root_parms) + + # Add debugging output for root group processing + self.logger.info("=== ROOT GROUP DEBUG ===") + self.logger.info("Available variables: %s", sorted(vars_to_extract)) + self.logger.info("Available dimensions: %s", sorted(src_dataset.dimensions.keys())) + self.logger.info( + "Available coordinate variables: %s", + [v for v in sorted(src_dataset.variables.keys()) if v in src_dataset.dimensions], + ) - # Add debugging output for root group processing - self.logger.info("=== ROOT GROUP DEBUG ===") - self.logger.info("Available variables: %s", sorted(vars_to_extract)) - self.logger.info("Available dimensions: %s", sorted(src_dataset.dimensions.keys())) - self.logger.info( - "Available coordinate variables: %s", - [v for v in sorted(src_dataset.variables.keys()) if v in src_dataset.dimensions], + if vars_to_extract: + output_file = output_dir / f"{Path(log_file).stem}_{GROUP}_Universals.nc" + self._create_netcdf_file( + log_file, group_name, src_dataset, vars_to_extract, output_file ) - - if vars_to_extract: - output_file = output_dir / f"{Path(log_file).stem}_{GROUP}_Universals.nc" - self._create_netcdf_file( - log_file, group_name, src_dataset, vars_to_extract, output_file - ) - self.logger.info("Extracted root group '/' to %s", output_file) - else: - self.logger.warning("No requested variables found in root group '/'") - - except Exception as e: # noqa: BLE001 - self.logger.warning("Could not extract root group '/': %s", e) + self.logger.info("Extracted root group '/' to %s", output_file) + else: + self.logger.warning("No requested variables found in root group '/'") def _extract_single_group( self, @@ -276,7 +272,7 @@ def _extract_single_group( self.logger.debug(" Group %s", group_name) src_group = src_dataset.groups[group_name] - vars_to_extract = self._get_available_variables(src_group, group_parms) + vars_to_extract, requested_vars = self._get_available_variables(src_group, group_parms) if vars_to_extract: output_file = output_dir / f"{Path(log_file).stem}_{GROUP}_{group_name}.nc" @@ -285,12 +281,12 @@ def _extract_single_group( ) self.logger.info("Extracted %s to %s", group_name, output_file) else: - self.logger.warning("No requested variables found in group %s", group_name) + self.logger.warning( + "No requested variables (%s) found in group %s", requested_vars, group_name + ) except KeyError: self.logger.warning("Group %s not found", group_name) - # except Exception as e: # noqa: BLE001 - # self.logger.warning("Could not extract %s: %s", group_name, e) def _get_available_variables( self, src_group: netCDF4.Group, group_parms: list[dict[str, Any]] @@ -301,7 +297,7 @@ def _get_available_variables( vars_to_extract = [var for var in requested_vars if var in available_vars] self.logger.debug(" Variables to extract: %s", vars_to_extract) - return vars_to_extract + return vars_to_extract, requested_vars def _get_time_filters_for_variables( self, log_file: str, group_name: str, src_group: netCDF4.Group, vars_to_extract: list[str] @@ -730,75 +726,79 @@ def _copy_variable_with_appropriate_time_filter( # noqa: C901, PLR0912 time_filters: dict[str, dict], ): """Copy a variable with appropriate time filtering applied.""" - try: - src_var = src_group.variables[var_name] - - # Skip variables that use time dimensions with 0 points - for dim_name in src_var.dimensions: - if ( - dim_name in time_filters - and time_filters[dim_name]["filtered"] - and len(time_filters[dim_name]["indices"]) == 0 - ): - self.logger.debug( - "Skipping variable %s (uses dimension %s with 0 points)", var_name, dim_name - ) - return + src_var = src_group.variables[var_name] + + # Skip variables that use time dimensions with 0 points + for dim_name in src_var.dimensions: + if ( + dim_name in time_filters + and time_filters[dim_name]["filtered"] + and len(time_filters[dim_name]["indices"]) == 0 + ): + self.logger.debug( + "Skipping variable %s (uses dimension %s with 0 points)", var_name, dim_name + ) + return - # Create variable in destination + # Create variable in destination + try: dst_var = dst_dataset.createVariable( var_name, src_var.dtype, src_var.dimensions, + zlib=True, + complevel=4, ) + except ValueError as e: + self.logger.warning( + "Could not create variable %s in destination dataset: %s. ", + var_name, + str(e), + ) + return - # Check if this variable itself is a time coordinate that needs filtering - if var_name in time_filters and time_filters[var_name]["filtered"]: - # This is a time coordinate variable that needs filtering - time_indices = time_filters[var_name]["indices"] - dst_var[:] = src_var[:][time_indices] - dst_var.setncattr("comment", time_filters[var_name]["comment"]) - self.logger.debug("Applied time filtering to time coordinate %s", var_name) - - # Check if this variable depends on any filtered time dimensions - elif src_var.dimensions: - # Find which (if any) of this variable's dimensions are filtered time coordinates - filtered_dims = {} - for dim_name in src_var.dimensions: - if dim_name in time_filters and time_filters[dim_name]["filtered"]: - filtered_dims[dim_name] = time_filters[dim_name]["indices"] - - if filtered_dims: - # Apply filtering for the appropriate dimensions - self._apply_multidimensional_time_filter( - src_var, dst_var, var_name, filtered_dims - ) - else: - # No time filtering needed - dst_var[:] = src_var[:] + # Check if this variable itself is a time coordinate that needs filtering + if var_name in time_filters and time_filters[var_name]["filtered"]: + # This is a time coordinate variable that needs filtering + time_indices = time_filters[var_name]["indices"] + dst_var[:] = src_var[:][time_indices] + dst_var.setncattr("comment", time_filters[var_name]["comment"]) + self.logger.debug("Applied time filtering to time coordinate %s", var_name) + + # Check if this variable depends on any filtered time dimensions + elif src_var.dimensions: + # Find which (if any) of this variable's dimensions are filtered time coordinates + filtered_dims = {} + for dim_name in src_var.dimensions: + if dim_name in time_filters and time_filters[dim_name]["filtered"]: + filtered_dims[dim_name] = time_filters[dim_name]["indices"] + + if filtered_dims: + # Apply filtering for the appropriate dimensions + self._apply_multidimensional_time_filter(src_var, dst_var, var_name, filtered_dims) else: - # Scalar variable or no dimensions + # No time filtering needed dst_var[:] = src_var[:] + else: + # Scalar variable or no dimensions + dst_var[:] = src_var[:] - # Copy attributes - for attr_name in src_var.ncattrs(): - dst_var.setncattr(attr_name, src_var.getncattr(attr_name)) - if var_name in time_filters and time_filters[var_name]["filtered"]: - # Downstream process uses cf_xarray to recognize coordinates, add required attribute - dst_var.setncattr("standard_name", "time") - else: - # Override any coordinates attribute in src with just the time coordinate - dst_var.setncattr("coordinates", var_name + "_time") - # Downstream process uses cf_xarray to recognize coordinates, add required attribute - if var_name.startswith(("longitude", "latitude")): - dst_var.setncattr("units", "radians") - elif var_name.startswith("depth"): - dst_var.setncattr("units", "meters") - - self.logger.debug(" Copied variable: %s", var_name) - - except Exception as e: # noqa: BLE001 - self.logger.warning("Failed to copy variable %s: %s", var_name, e) + # Copy attributes + for attr_name in src_var.ncattrs(): + dst_var.setncattr(attr_name, src_var.getncattr(attr_name)) + if var_name in time_filters and time_filters[var_name]["filtered"]: + # Downstream process uses cf_xarray to recognize coordinates, add required attribute + dst_var.setncattr("standard_name", "time") + else: + # Override any coordinates attribute in src with just the time coordinate + dst_var.setncattr("coordinates", var_name + "_time") + # Downstream process uses cf_xarray to recognize coordinates, add required attribute + if var_name.startswith(("longitude", "latitude")): + dst_var.setncattr("units", "radians") + elif var_name.startswith("depth"): + dst_var.setncattr("units", "meters") + + self.logger.debug(" Copied variable: %s", var_name) def _apply_multidimensional_time_filter( self, src_var, dst_var, var_name: str, filtered_dims: dict[str, list[int]] From 40d0610860b6ff02ccf3e58d24659246b19cb55d Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 10 Nov 2025 10:10:20 -0800 Subject: [PATCH 044/121] Add --log_file option and generalize for doroado or lrauv processing. --- src/data/align.py | 44 +++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/src/data/align.py b/src/data/align.py index 9e07d43e..f8603424 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -34,6 +34,7 @@ TIME60HZ, AUV_NetCDF, ) +from nc42netcdfs import BASE_LRAUV_PATH from scipy.interpolate import interp1d @@ -127,16 +128,20 @@ def global_metadata(self): return metadata - def process_cal(self, vehicle: str = "", name: str = "") -> None: # noqa: C901, PLR0912, PLR0915 + def process_cal(self, vehicle: str = "", name: str = "", log_file: str = "") -> None: # noqa: C901, PLR0912, PLR0915 name = name or self.args.mission vehicle = vehicle or self.args.auv_name - netcdfs_dir = Path(self.args.base_path, vehicle, MISSIONNETCDFS, name) - in_fn = f"{vehicle}_{name}_cal.nc" - try: - self.calibrated_nc = xr.open_dataset(Path(netcdfs_dir, in_fn)) - except ValueError as e: - raise InvalidCalFile(e) from e - self.logger.info("Processing %s from %s", in_fn, netcdfs_dir) + if name and vehicle: + netcdfs_dir = Path(self.args.base_path, vehicle, MISSIONNETCDFS, name) + src_file = Path(netcdfs_dir, f"{vehicle}_{name}_cal.nc") + elif log_file: + netcdfs_dir = Path(BASE_LRAUV_PATH, f"{Path(log_file).parent}") + src_file = Path(netcdfs_dir, f"{Path(log_file).stem}_cal.nc") + else: + msg = "Must provide either mission and vehicle or log_file" + raise ValueError(msg) + self.calibrated_nc = xr.open_dataset(src_file) + self.logger.info("Processing %s", src_file) self.aligned_nc = xr.Dataset() self.min_time = datetime.now(UTC) self.max_time = datetime(1970, 1, 1, tzinfo=UTC) @@ -178,7 +183,7 @@ def process_cal(self, vehicle: str = "", name: str = "") -> None: # noqa: C901, bounds_error=False, ) except KeyError: - error_message = f"No nudged_latitude data in {in_fn}" + error_message = f"No nudged_latitude data in {src_file}" raise InvalidCalFile(error_message) from None lon_interp = interp1d( self.calibrated_nc["nudged_longitude"].get_index("time").view(np.int64).tolist(), @@ -278,7 +283,7 @@ def process_cal(self, vehicle: str = "", name: str = "") -> None: # noqa: C901, ) self.aligned_nc[f"{instr}_latitude"].attrs = self.calibrated_nc["nudged_latitude"].attrs self.aligned_nc[f"{instr}_latitude"].attrs["comment"] += ( - f". Variable nudged_latitude from {in_fn} file linearly" + f". Variable nudged_latitude from {src_file} file linearly" f" interpolated onto {variable.split('_')[0]} time values." ) self.aligned_nc[f"{instr}_latitude"].attrs["long_name"] = "Latitude" @@ -294,7 +299,7 @@ def process_cal(self, vehicle: str = "", name: str = "") -> None: # noqa: C901, "nudged_longitude" ].attrs self.aligned_nc[f"{instr}_longitude"].attrs["comment"] += ( - f". Variable nudged_longitude from {in_fn} file linearly" + f". Variable nudged_longitude from {src_file} file linearly" f" interpolated onto {variable.split('_')[0]} time values." ) self.aligned_nc[f"{instr}_longitude"].attrs["long_name"] = "Longitude" @@ -373,6 +378,15 @@ def process_command_line(self): action="store", help="Mission directory, e.g.: 2020.064.10", ) + parser.add_argument( + "--log_file", + action="store", + help=( + "Path to the log file of original LRAUV data, e.g.: " + "brizo/missionlogs/2025/20250903_20250909/" + "20250905T072042/202509050720_202509051653.nc4" + ), + ) parser.add_argument( "--plot", action="store_true", @@ -401,6 +415,10 @@ def process_command_line(self): align_netcdf = Align_NetCDF() align_netcdf.process_command_line() p_start = time.time() - netcdf_dir = align_netcdf.process_cal() - align_netcdf.write_netcdf(netcdf_dir) + if align_netcdf.args.auv_name and align_netcdf.args.mission: + netcdf_dir = align_netcdf.process_cal() + align_netcdf.write_netcdf(netcdf_dir) + elif align_netcdf.args.log_file: + netcdf_dir = align_netcdf.process_cal(log_file=align_netcdf.args.log_file) + align_netcdf.write_netcdf(netcdf_dir) align_netcdf.logger.info("Time to process: %.2f seconds", (time.time() - p_start)) From 060f097e0bb595bf4e8e042b22e8907ea1be9b24 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 10 Nov 2025 10:11:45 -0800 Subject: [PATCH 045/121] Get seg_min as float(seconds) / 60.0. --- src/data/AUV.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/data/AUV.py b/src/data/AUV.py index 4d09ced6..8dd913e1 100755 --- a/src/data/AUV.py +++ b/src/data/AUV.py @@ -103,7 +103,9 @@ def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 dt_nudged = np.array([], dtype="datetime64[ns]") if segi.any(): # Return difference of numpy timestamps in units of minutes - seg_min = (lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]).astype("timedelta64[m]") + seg_min = (lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]).astype( + "timedelta64[s]" + ).astype(float) / 60.0 else: seg_min = 0 logger.info( From 8bcfeb169b647e9cb4fda42cfb39b8b07b26f61b Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 10 Nov 2025 10:47:07 -0800 Subject: [PATCH 046/121] Update docstring, remove unused methods, convert lon & lat from radians to degrees. --- src/data/combine.py | 257 ++++++++------------------------------------ 1 file changed, 42 insertions(+), 215 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index c186f68e..e5bb19a4 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -1,18 +1,21 @@ #!/usr/bin/env python """ -Combine original LRAUV data from separate .nc files and produce a single NetCDF -file that also contains corrected (nudged) latitudes and longitudes. +Combine original LRAUV data from separate *_Group_*.nc files and produce a +single NetCDF file that also contains corrected (nudged) latitudes and +longitudes. Read original data from netCDF files created by nc42netcdfs.py and write out a single netCDF file with the important variables at original sampling intervals. -Geometric alignment and any plumbing lag corrections are also done during this -step. This script is similar to calibrate.py that is used for Dorado and i2map -data, but does not apply any sensor calibrations as those are done on the LRAUV -vehicles before the data is logged and unserialized to NetCDF-4 files. The QC -methods implemented in calibrate.py will be reused here. +Any geometric alignment and any plumbing lag corrections can also be done during +this step. This script is similar to calibrate.py that is used for Dorado and +i2map data, but does not apply any sensor calibrations as those are done on the +LRAUV vehicles before the data is logged and unserialized to NetCDF4 files. The +QC methods implemented in calibrate.py may also be reused here. The calbrate.py +code is wrapped around the concept of "sensor" which has an anaolog in this code +of "group", but is too different to easily reuse. The file will contain combined variables (the combined_nc member variable) and -be analogous to the original NetCDF-4. Rather than using groups in NetCDF-4 the +be analogous to the original NetCDF4. Rather than using groups in NetCDF4 the data will be written in classic NetCDF-CF with a naming convention that is similar to Dorado data, with group names (any underscores removed) preceeding the variable name with an underscore - all lower case characters: @@ -25,8 +28,10 @@ _latitude _longitude ``` -The file will be named with a "_cal.nc" suffix to be consistent with the Dorado -and i2map files, indicating the stage of processing. +The file will be named with a "_combined.nc" suffix. It is analogous to the +"_cal.nc" suffix used for Dorado and i2map files and will provide a clear +indication of the stage of processing. The data are suiable for input to the +align.py script. """ @@ -43,10 +48,8 @@ from socket import gethostname from typing import NamedTuple import cf_xarray # Needed for the .cf accessor # noqa: F401 -import matplotlib.pyplot as plt import numpy as np import xarray as xr -from scipy.interpolate import interp1d import pandas as pd from AUV import monotonic_increasing_time_indices, nudge_positions @@ -61,11 +64,8 @@ class Range(NamedTuple): max: float -# Using lower case vehicle names, modify in _define_sensor_info() for changes -# over time Used to reduce ERROR & WARNING log messages for expected missing -# sensor data. There are core data common to most all vehicles, whose groups -# are listed in BASE_GROUPS. EXPECTED_GROUPS contains additional groups for -# specific vehicles. +# There are core data common to most all vehicles, whose groups are listed in +# BASE_GROUPS. EXPECTED_GROUPS contains additional groups for specific vehicles. BASE_GROUPS = { "lrauv": [ "CTDSeabird", @@ -74,75 +74,13 @@ class Range(NamedTuple): } EXPECTED_GROUPS = { - "dorado": [ - "navigation", - "gps", - "depth", - "ecopuck", - "hs2", - "ctd1", - "ctd2", - "isus", - "biolume", - "lopc", - "tailcone", - ], - "i2map": [ - "navigation", - "gps", - "depth", - "seabird25p", - "transmissometer", - "tailcone", + "pontus": [ + "WetLabsUBAT", ], } -# Used in test fixture in conftetst.py -EXPECTED_GROUPS["Dorado389"] = EXPECTED_GROUPS["dorado"] - - -def align_geom(sensor_offset, pitches): - """Use x & y sensor_offset values in meters from sensor_info and - pitch in degrees to compute and return actual depths of the sensor - based on the geometry relative to the vehicle's depth sensor. - """ - # See https://en.wikipedia.org/wiki/Rotation_matrix - # - # * instrument location with pitch applied - # / | - # / | - # / | - # / | - # / | - # / | - # / | - # / | - # / | - # / - # / - # / y - # / _ - # / o - # / f - # / f - # / * instrument location - # / | - # / \ | | - # / \ | y - # / pitch (theta) | | - # / \ | | - # --------------------x------------------+ --> nose - # - # [ cos(pitch) -sin(pitch) ] [x] [x'] - # X = - # [ sin(pitch) cos(pitch) ] [y] [y'] - offsets = [] - for pitch in pitches: - theta = pitch * np.pi / 180.0 - R = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) - x_off, y_off = np.matmul(R, sensor_offset) - offsets.append(y_off) - - return offsets +# Combine the BASE_GROUPS into each EXPECTED_GROUPS entry +for vehicle, groups in EXPECTED_GROUPS.items(): + EXPECTED_GROUPS[vehicle] = groups + BASE_GROUPS["lrauv"] class Combine_NetCDF: @@ -285,31 +223,6 @@ def _range_qc_combined_nc( # noqa: C901, PLR0912 self.combined_nc = self.combined_nc.drop_vars(qced_vars) self.logger.info("Done range checking %s", instrument) - def _apply_plumbing_lag( - self, - sensor: str, - time_index: pd.DatetimeIndex, - time_name: str, - ) -> tuple[xr.DataArray, str]: - """ - Apply plumbing lag to a time index in the combined netCDF file. - """ - # Convert lag_secs to milliseconds as np.timedelta64 neeeds an integer - lagged_time = time_index - np.timedelta64( - int(self.sinfo[sensor]["lag_secs"] * 1000), - "ms", - ) - # Need to update the sensor's time coordinate in the combined netCDF file - # so that DataArrays created with lagged_time fit onto the coordinate - self.combined_nc.coords[f"{sensor}_{time_name}"] = xr.DataArray( - lagged_time, - coords=[lagged_time], - dims={f"{sensor}_{time_name}"}, - name=f"{sensor}_{time_name}", - ) - lag_info = f"with plumbing lag correction of {self.sinfo[sensor]['lag_secs']} seconds" - return lagged_time, lag_info - def _biolume_process(self, sensor): try: orig_nc = getattr(self, sensor).orig_data @@ -418,117 +331,16 @@ def _biolume_process(self, sensor): set_to_nan=True, ) - def _geometric_depth_correction(self, sensor, orig_nc): - """Performs the align_geom() function from the legacy Matlab. - Works for any sensor, but requires navigation being processed first - as its variables in combined_nc are required. Returns corrected depth - array. - """ - # Fix pitch values to first and last points for interpolation to time - # values outside the range of the pitch values. - # See https://stackoverflow.com/a/45446546 - # and https://github.com/scipy/scipy/issues/12707#issuecomment-672794335 - try: - p_interp = interp1d( - self.combined_nc["navigation_time"].to_numpy().tolist(), - self.combined_nc["navigation_pitch"].to_numpy(), - fill_value=( - self.combined_nc["navigation_pitch"].to_numpy()[0], - self.combined_nc["navigation_pitch"].to_numpy()[-1], - ), - bounds_error=False, - ) - except KeyError: - error_message = "No navigation_time or navigation_pitch in combined_nc." - raise EOFError(error_message) from None - pitch = p_interp(orig_nc["time"].to_numpy().tolist()) - - d_interp = interp1d( - self.combined_nc["depth_time"].to_numpy().tolist(), - self.combined_nc["depth_filtdepth"].to_numpy(), - fill_value=( - self.combined_nc["depth_filtdepth"].to_numpy()[0], - self.combined_nc["depth_filtdepth"].to_numpy()[-1], - ), - bounds_error=False, - ) - orig_depth = d_interp(orig_nc["time"].to_numpy().tolist()) - offs_depth = align_geom(self.sinfo[sensor]["sensor_offset"], pitch) - - corrected_depth = xr.DataArray( - (orig_depth - offs_depth).astype(np.float64).tolist(), - coords=[orig_nc.get_index("time")], - dims={f"{sensor}_time"}, - name=f"{sensor}_depth", - ) - # 2008.289.03 has self.combined_nc["depth_time"][-1] (2008-10-16T15:42:32) - # at lot less than orig_nc["time"][-1] (2008-10-16T16:24:43) - # which, with "extrapolate" causes wildly incorrect depths to -359 m - # There may be other cases where this happens, in which case we'd like - # a general solution. For now, we'll just correct this mission. - d_beg_time_diff = ( - orig_nc["time"].to_numpy()[0] - self.combined_nc["depth_time"].to_numpy()[0] - ) - d_end_time_diff = ( - orig_nc["time"].to_numpy()[-1] - self.combined_nc["depth_time"].to_numpy()[-1] - ) - self.logger.info( - "%s: d_beg_time_diff: %s, d_end_time_diff: %s", - sensor, - d_beg_time_diff.astype("timedelta64[s]"), - d_end_time_diff.astype("timedelta64[s]"), - ) - if self.args.mission in ( - "2008.289.03", - "2010.259.01", - "2010.259.02", - ): - # This could be a more general check for all missions, but let's restrict it - # to known problematic missions for now. The above info message can help - # determine if this is needed for other missions. - self.logger.info( - "%s: Special QC for mission %s: Setting corrected_depth to NaN for times after %s", - sensor, - self.args.mission, - self.combined_nc["depth_time"][-1].to_numpy(), - ) - corrected_depth[ - np.where( - orig_nc.get_index("time") > self.combined_nc["depth_time"].to_numpy()[-1], - ) - ] = np.nan - if self.args.plot: - plt.figure(figsize=(18, 6)) - plt.plot( - orig_nc["time"].to_numpy(), - orig_depth, - "-", - orig_nc["time"].to_numpy(), - corrected_depth, - "--", - orig_nc["time"].to_numpy(), - pitch, - ".", - ) - plt.ylabel("Depth (m) & Pitch (deg)") - plt.legend(("Original depth", "Pitch corrected depth", "Pitch")) - plt.title( - f"Original and pitch corrected depth for {self.args.auv_name} {self.args.mission}", - ) - plt.show() - - return corrected_depth - def _nudge_pos(self, max_sec_diff_at_end=10): """Apply linear nudges to underwater latitudes and longitudes so that they match the surface gps positions. """ try: - lon = self.combined_nc["universals_longitude"] + lon = self.combined_nc["universals_longitude"] * 180.0 / np.pi except KeyError: error_message = "No universals_longitude data in combined_nc" raise EOFError(error_message) from None - lat = self.combined_nc["universals_latitude"] + lat = self.combined_nc["universals_latitude"] * 180.0 / np.pi lon_fix = self.combined_nc["nal9602_longitude_fix"] lat_fix = self.combined_nc["nal9602_latitude_fix"] @@ -555,6 +367,7 @@ def combine_groups(self): log_file = self.args.log_file src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) group_files = sorted(src_dir.glob(f"{Path(log_file).stem}_{GROUP}_*.nc")) + self.summary_fields = set() self.combined_nc = xr.Dataset() for group_file in group_files: self.logger.info("Group file: %s", group_file.name) @@ -567,10 +380,23 @@ def combine_groups(self): new_group = group_file.stem.split(f"{GROUP}_")[1].replace("_", "").lower() new_var = new_group + "_" + orig_var.lower() self.logger.info("Adding variable %-65s %s", f"{orig_var} as", new_var) - self.combined_nc[new_var] = ds[orig_var] + if ( + orig_var in ("latitude", "longitude") + and ds[orig_var].attrs.get("units") == "radians" + ): + # Convert radians to degrees + self.combined_nc[new_var] = ds[orig_var] * 180.0 / np.pi + self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() + self.combined_nc[new_var].attrs["units"] = "degrees" + else: + self.combined_nc[new_var] = ds[orig_var] # Add nudged longitude and latitude variables to the combined_nc dataset - nudged_longitude, nudged_latitude = self._nudge_pos() + try: + nudged_longitude, nudged_latitude = self._nudge_pos() + except ValueError as e: + self.logger.error("Nudging positions failed: %s", e) # noqa: TRY400 + return self.combined_nc["nudged_longitude"] = nudged_longitude self.combined_nc["nudged_longitude"].attrs = { "long_name": "Nudged Longitude", @@ -589,7 +415,7 @@ def combine_groups(self): def write_netcdf(self) -> None: log_file = self.args.log_file netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) - out_fn = Path(netcdfs_dir, f"{Path(log_file).stem}_cal.nc") + out_fn = Path(netcdfs_dir, f"{Path(log_file).stem}_combined.nc") self.combined_nc.attrs = self.global_metadata() self.logger.info("Writing combined group data to %s", out_fn) @@ -600,6 +426,7 @@ def write_netcdf(self) -> None: "Data variables written: %s", ", ".join(sorted(self.combined_nc.variables)), ) + self.logger.info("Wrote combined (_combined.nc) netCDF file: %s", out_fn) return netcdfs_dir From 3c7ccdebd910a33c4e8c1ecde276b8e2f09ae0d5 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 10 Nov 2025 10:48:59 -0800 Subject: [PATCH 047/121] Have combine.py write a *_combined.nc file. --- LRAUV_WORKFLOW.md | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/LRAUV_WORKFLOW.md b/LRAUV_WORKFLOW.md index 98307671..4fcfc26f 100644 --- a/LRAUV_WORKFLOW.md +++ b/LRAUV_WORKFLOW.md @@ -13,24 +13,26 @@ on the local file system's work directory is as follows: │ │ ├── <- e.g.: ahi, brizo, pontus, tethys, ... │ │ │ ├── missionlogs/year/dlist_dir │ │ │ │ ├── <- e.g.: ahi/missionlogs/2025/20250908_20250912/20250911T201546/202509112015_202509112115.nc4 - │ │ │ │ │ ├── <- .nc4 file containing original data + │ │ │ │ │ ├── <- .nc4 file containing original data - created by unserialize │ │ │ │ │ ├── <- .nc files, one for each group from the .nc4 file - | | | | | | data identical to original in NETCDF4 format - │ │ │ │ │ ├── <_cal> <- A single NETCDF3 .nc file containing all the - | | | | | | varibles from the .nc files along with nudged - | | | | | | latitudes and longitudes - created by combine.py + | | | | | | data identical to original in NetCDF4 format, + | | | | | | but in more interoperable NetCDF3 format + | | | | | | - created by nc42netcdfs.py + │ │ │ │ │ ├── <_combined> <- A single NetCDF3 .nc file containing all the + | | | | | | varibles from the .nc files along with nudged + | | | | | | latitudes and longitudes - created by combine.py │ │ │ │ │ ├── <_align> <- .nc file with all measurement variables | | | | | | having associated coordinate variables - | | | | | | at original instrument sampling rate - - | | | | | | created by align.py + | | | | | | at original instrument sampling rate + | | | | | | - created by align.py │ │ │ │ │ ├── <_nS> <- .nc file with all measurement variables resampled to a common time grid at n Second intervals - created by resample.py nc42netcdfs.py Extract the groups and the variables we want from the groups into - individual .nc files. These data are saved using NETCDF4 format as - there are many unlimited dimensions that are not allowed in NETCDF3. + individual .nc files. These data are saved using NetCDF4 format as + there are many unlimited dimensions that are not allowed in NetCDF3. The data in the .nc files are identical to what is in the .nc4 groups. combine.py From e18d5de98c4b4379ce8623dd7774cb2cfc830471 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 10 Nov 2025 10:49:32 -0800 Subject: [PATCH 048/121] Start testing align.py for lrauv log_files. --- .vscode/launch.json | 13 +++++++++---- src/data/process.py | 7 +++++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 4c8e7641..59e53006 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -54,9 +54,12 @@ "console": "integratedTerminal", // A small log_file that has a reasonable amount of data, and known_hash to verify download //"args": ["-v", "1", "--log_file", "ahi/missionlogs/2025/20250908_20250912/20250911T201546/202509112015_202509112115.nc4", "--known_hash", "d1235ead55023bea05e9841465d54a45dfab007a283320322e28b84438fb8a85"] - // Has bad latitude and longitude values and lots of bad Universal latitude_time values + // brizo 20250914T080941 has bad latitude and longitude values and lots of bad Universal latitude_time and longitude_time values //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] - "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/latitude_time"] + //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/longitude_time"] + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/latitude_time"] + // brizo 20250916T230652 has several ESP Samples from stoqs_lrauv_sep2025 + "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot_time", "/longitude_time"] }, { "name": "2.0 - calibrate.py", @@ -112,7 +115,8 @@ "program": "${workspaceFolder}/src/data/combine.py", "console": "integratedTerminal", "justMyCode": false, - "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] + "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] }, { "name": "3.0 - align.py", @@ -321,7 +325,8 @@ "console": "integratedTerminal", //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber"] - "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber", "--no_cleanup"] + //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber", "--no_cleanup"] + "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup"] //"args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901", "--end", "20121101", "--noinput"] }, diff --git a/src/data/process.py b/src/data/process.py index fbb0428d..6856f3db 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -300,7 +300,7 @@ def calibrate(self, mission: str) -> None: cal_netcdf.logger.error("%s %s", mission, e) # noqa: TRY400 cal_netcdf.logger.removeHandler(self.log_handler) - def align(self, mission: str) -> None: + def align(self, mission: str = "", log_file: str = "") -> None: self.logger.info("Alignment steps for %s", mission) align_netcdf = Align_NetCDF() align_netcdf.args = argparse.Namespace() @@ -313,7 +313,10 @@ def align(self, mission: str) -> None: align_netcdf.logger.addHandler(self.log_handler) align_netcdf.commandline = self.commandline try: - netcdf_dir = align_netcdf.process_cal() + if log_file: + netcdf_dir = align_netcdf.process_cal(log_file=log_file) + else: + netcdf_dir = align_netcdf.process_cal() align_netcdf.write_netcdf(netcdf_dir) except (FileNotFoundError, EOFError) as e: align_netcdf.logger.error("%s %s", mission, e) # noqa: TRY400 From 475cbec99a9bb9eeefd1a683e3bd970f36460535 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 10 Nov 2025 12:30:57 -0800 Subject: [PATCH 049/121] Add additional diagnostic mesages for severe dead reckoned time sync problems. --- src/data/nc42netcdfs.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 9f0e54f1..fe3f286e 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -401,6 +401,24 @@ def _analyze_original_time_coordinates( percent_different, max_abs_diff, ) + self.logger.warning( + "Dead Reckoned timing: Differences begin at index %d", + np.where(non_zero_mask)[0][0], + ) + lon_subset = lon_time[ + max(0, np.where(non_zero_mask)[0][0] - 5) : np.where(non_zero_mask)[0][0] + 5 + ] + lat_subset = lat_time[ + max(0, np.where(non_zero_mask)[0][0] - 5) : np.where(non_zero_mask)[0][0] + 5 + ] + self.logger.warning( + "Dead Reckoned timing: longitude_time around this index: %s", + " ".join(f"{val:14.2f}" for val in lon_subset), + ) + self.logger.warning( + "Dead Reckoned timing: latitude_time around this index: %s", + " ".join(f"{val:14.2f}" for val in lat_subset), + ) elif percent_different > MINOR_PERCENT_THRESHOLD or max_abs_diff > MINOR_TIME_THRESHOLD: self.logger.warning( "Dead Reckoned timing: Minor synchronization issues detected - " From 455ccc5c0c10222c251721c5e29992a919a705e4 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 10 Nov 2025 19:05:00 -0800 Subject: [PATCH 050/121] Read in Group files with decode_cf=False, use xr.DataArray() to construct new variables. --- src/data/combine.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index e5bb19a4..0d72af4e 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -107,13 +107,13 @@ def global_metadata(self): metadata["featureType"] = "trajectory" try: metadata["time_coverage_start"] = str( - self.combined_nc["depth_time"].to_pandas().iloc[0].isoformat(), + pd.to_datetime(self.combined_nc["depth_time"].values, unit="s")[0].isoformat(), ) except KeyError: error_message = "No depth_time variable in combined_nc" raise EOFError(error_message) from None metadata["time_coverage_end"] = str( - self.combined_nc["depth_time"].to_pandas().iloc[-1].isoformat(), + pd.to_datetime(self.combined_nc["depth_time"].values, unit="s")[-1].isoformat(), ) metadata["distribution_statement"] = "Any use requires prior approval from MBARI" metadata["license"] = metadata["distribution_statement"] @@ -371,25 +371,36 @@ def combine_groups(self): self.combined_nc = xr.Dataset() for group_file in group_files: self.logger.info("Group file: %s", group_file.name) - # Loop through each variable in the group file and add it to the combined_nc member list - with xr.open_dataset(group_file) as ds: + with xr.open_dataset(group_file, decode_cf=False) as ds: + # New group name is loawercase with underscores removed + group_name = group_file.stem.split(f"{GROUP}_")[1].replace("_", "").lower() + for orig_var in ds.variables: if orig_var.lower().endswith("time"): - self.logger.debug("Skipping time variable: %s", orig_var) + self.logger.info("Skipping time variable: %s", orig_var) continue - new_group = group_file.stem.split(f"{GROUP}_")[1].replace("_", "").lower() - new_var = new_group + "_" + orig_var.lower() + new_var = group_name + "_" + orig_var.lower() self.logger.info("Adding variable %-65s %s", f"{orig_var} as", new_var) if ( orig_var in ("latitude", "longitude") and ds[orig_var].attrs.get("units") == "radians" ): # Convert radians to degrees - self.combined_nc[new_var] = ds[orig_var] * 180.0 / np.pi + self.combined_nc[new_var] = xr.DataArray( + ds[orig_var].to_numpy() * 180.0 / np.pi, + coords=ds[orig_var].coords, + dims=ds[orig_var].dims, + ) self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() self.combined_nc[new_var].attrs["units"] = "degrees" + else: - self.combined_nc[new_var] = ds[orig_var] + self.combined_nc[new_var] = xr.DataArray( + ds[orig_var].to_numpy(), + coords=ds[orig_var].coords, + dims=ds[orig_var].dims, + ) + self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() # Add nudged longitude and latitude variables to the combined_nc dataset try: From b31fb8d675b5c934a8894e84d69c8e9bfbef6f33 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 11 Nov 2025 10:17:51 -0800 Subject: [PATCH 051/121] Add _consolidate_group_time_coords() and set dims and coords from its analysis. --- src/data/combine.py | 119 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 112 insertions(+), 7 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index 0d72af4e..f8c109e8 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -363,6 +363,100 @@ def _nudge_pos(self, max_sec_diff_at_end=10): return lon_nudged, lat_nudged + def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dict: + """Analyze and consolidate time coordinates for a group. + + Returns: + dict: Contains consolidated time info with keys: + - consolidated_time_name: name of consolidated coordinate (or None) + - consolidated_time_data: the time coordinate data (or None) + - time_coord_mapping: dict mapping original dims to consolidated dims + """ + # Find all time variables in this group + time_vars = {var: ds[var] for var in ds.variables if var.lower().endswith("time")} + + if not time_vars: + return { + "consolidated_time_name": None, + "consolidated_time_data": None, + "time_coord_mapping": {}, + } + + if len(time_vars) == 1: + # Single time coordinate - use it as consolidated + time_name = list(time_vars.keys())[0] + consolidated_name = f"{group_name}_time" + self.logger.info( + "Group %s: Single time coordinate '%s' - using as '%s'", + group_name, + time_name, + consolidated_name, + ) + return { + "consolidated_time_name": consolidated_name, + "consolidated_time_data": ds[time_name], + "time_coord_mapping": {time_name: consolidated_name}, + } + + # Multiple time coordinates - check if they're identical + time_arrays = list(time_vars.values()) + first_time = time_arrays[0] + first_time_name = list(time_vars.keys())[0] + + all_identical = True + for i, (_name, time_array) in enumerate(time_vars.items()): + if i == 0: + continue # Skip first one (reference) + + # Compare sizes first + if len(time_array) != len(first_time): + all_identical = False + break + + # Compare values with tolerance + try: + if not np.allclose(time_array.values, first_time.values, atol=1e-6): + all_identical = False + break + except TypeError: + # Handle datetime arrays + if not np.array_equal(time_array.values, first_time.values): + all_identical = False + break + + if all_identical: + # All time coordinates are identical - consolidate them + consolidated_name = f"{group_name}_time" + time_coord_mapping = dict.fromkeys(time_vars, consolidated_name) + + self.logger.info( + "Group %s: All %d time coordinates identical - consolidating to '%s'", + group_name, + len(time_vars), + consolidated_name, + ) + + return { + "consolidated_time_name": consolidated_name, + "consolidated_time_data": ds[first_time_name], + "time_coord_mapping": time_coord_mapping, + } + + # Time coordinates differ - keep them separate + time_coord_mapping = {name: f"{group_name}_{name.lower()}" for name in time_vars} + + self.logger.warning( + "Group %s: Time coordinates differ - keeping separate: %s", + group_name, + list(time_vars.keys()), + ) + + return { + "consolidated_time_name": None, + "consolidated_time_data": None, + "time_coord_mapping": time_coord_mapping, + } + def combine_groups(self): log_file = self.args.log_file src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) @@ -372,12 +466,12 @@ def combine_groups(self): for group_file in group_files: self.logger.info("Group file: %s", group_file.name) with xr.open_dataset(group_file, decode_cf=False) as ds: - # New group name is loawercase with underscores removed + # Group name to prepend variable names is lowercase with underscores removed group_name = group_file.stem.split(f"{GROUP}_")[1].replace("_", "").lower() + time_info = self._consolidate_group_time_coords(ds, group_name) for orig_var in ds.variables: if orig_var.lower().endswith("time"): - self.logger.info("Skipping time variable: %s", orig_var) continue new_var = group_name + "_" + orig_var.lower() self.logger.info("Adding variable %-65s %s", f"{orig_var} as", new_var) @@ -388,20 +482,31 @@ def combine_groups(self): # Convert radians to degrees self.combined_nc[new_var] = xr.DataArray( ds[orig_var].to_numpy() * 180.0 / np.pi, - coords=ds[orig_var].coords, - dims=ds[orig_var].dims, + dims=[time_info["time_coord_mapping"][ds[orig_var].dims[0]]], + coords=[ds[orig_var].get_index(orig_var + "_time")], ) self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() self.combined_nc[new_var].attrs["units"] = "degrees" - else: self.combined_nc[new_var] = xr.DataArray( ds[orig_var].to_numpy(), - coords=ds[orig_var].coords, - dims=ds[orig_var].dims, + dims=[time_info["time_coord_mapping"][ds[orig_var].dims[0]]], + coords=[ds[orig_var].get_index(orig_var + "_time")], ) self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() + # Construct useful comment for consolidated time coordinate + if time_info["consolidated_time_name"] in self.combined_nc.variables: + mapping_info = ", ".join( + [ + f"{orig} -> {new}" + for orig, new in time_info["time_coord_mapping"].items() + ] + ) + self.combined_nc[time_info["consolidated_time_name"]].attrs["comment"] = ( + f"Consolidated time coordinate from: {mapping_info}" + ) + # Add nudged longitude and latitude variables to the combined_nc dataset try: nudged_longitude, nudged_latitude = self._nudge_pos() From e956d0330136d29ee6059e821baf4098ebc04447 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 11 Nov 2025 10:18:30 -0800 Subject: [PATCH 052/121] Only set units to radians for lat & lon in the / group. --- src/data/nc42netcdfs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index fe3f286e..ed778f82 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -811,7 +811,7 @@ def _copy_variable_with_appropriate_time_filter( # noqa: C901, PLR0912 # Override any coordinates attribute in src with just the time coordinate dst_var.setncattr("coordinates", var_name + "_time") # Downstream process uses cf_xarray to recognize coordinates, add required attribute - if var_name.startswith(("longitude", "latitude")): + if src_group.name == "/" and var_name.startswith(("longitude", "latitude")): dst_var.setncattr("units", "radians") elif var_name.startswith("depth"): dst_var.setncattr("units", "meters") From 50376aa518c9667b17ad9986fc4dec88dbca4720 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 11 Nov 2025 12:09:15 -0800 Subject: [PATCH 053/121] Add required metadata for cf decoding, write intermediate file so that cf decoding can be used for nudge_positions(). --- src/data/combine.py | 64 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 15 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index f8c109e8..bcb96414 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -107,13 +107,13 @@ def global_metadata(self): metadata["featureType"] = "trajectory" try: metadata["time_coverage_start"] = str( - pd.to_datetime(self.combined_nc["depth_time"].values, unit="s")[0].isoformat(), + pd.to_datetime(self.combined_nc["universals_time"].values, unit="s")[0].isoformat(), ) except KeyError: - error_message = "No depth_time variable in combined_nc" + error_message = "No universals_time variable in combined_nc" raise EOFError(error_message) from None metadata["time_coverage_end"] = str( - pd.to_datetime(self.combined_nc["depth_time"].values, unit="s")[-1].isoformat(), + pd.to_datetime(self.combined_nc["universals_time"].values, unit="s")[-1].isoformat(), ) metadata["distribution_statement"] = "Any use requires prior approval from MBARI" metadata["license"] = metadata["distribution_statement"] @@ -332,19 +332,20 @@ def _biolume_process(self, sensor): ) def _nudge_pos(self, max_sec_diff_at_end=10): - """Apply linear nudges to underwater latitudes and longitudes so that - they match the surface gps positions. + """Match variables from lrauv processing to those needed by + AUV.nudged_positions() so that linear nudges to underwater dead reckoned + positions will match the GPS positions at the surface. """ try: - lon = self.combined_nc["universals_longitude"] * 180.0 / np.pi + lon = self.combined_nc["universals_longitude"] except KeyError: error_message = "No universals_longitude data in combined_nc" raise EOFError(error_message) from None - lat = self.combined_nc["universals_latitude"] * 180.0 / np.pi + lat = self.combined_nc["universals_latitude"] lon_fix = self.combined_nc["nal9602_longitude_fix"] lat_fix = self.combined_nc["nal9602_latitude_fix"] - # Use the shared function from AUV module + # Use the shared nudge_positions() function from AUV module lon_nudged, lat_nudged, segment_count, segment_minsum = nudge_positions( nav_longitude=lon, nav_latitude=lat, @@ -357,10 +358,6 @@ def _nudge_pos(self, max_sec_diff_at_end=10): create_plots=True, ) - # Store results in instance variables for compatibility - self.segment_count = segment_count - self.segment_minsum = segment_minsum - return lon_nudged, lat_nudged def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dict: @@ -465,6 +462,7 @@ def combine_groups(self): self.combined_nc = xr.Dataset() for group_file in group_files: self.logger.info("Group file: %s", group_file.name) + # Open group file without decoding to have np.allclose work properly with xr.open_dataset(group_file, decode_cf=False) as ds: # Group name to prepend variable names is lowercase with underscores removed group_name = group_file.stem.split(f"{GROUP}_")[1].replace("_", "").lower() @@ -474,15 +472,15 @@ def combine_groups(self): if orig_var.lower().endswith("time"): continue new_var = group_name + "_" + orig_var.lower() + dim_name = time_info["time_coord_mapping"][ds[orig_var].dims[0]] self.logger.info("Adding variable %-65s %s", f"{orig_var} as", new_var) if ( orig_var in ("latitude", "longitude") and ds[orig_var].attrs.get("units") == "radians" ): - # Convert radians to degrees self.combined_nc[new_var] = xr.DataArray( ds[orig_var].to_numpy() * 180.0 / np.pi, - dims=[time_info["time_coord_mapping"][ds[orig_var].dims[0]]], + dims=[dim_name], coords=[ds[orig_var].get_index(orig_var + "_time")], ) self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() @@ -490,11 +488,17 @@ def combine_groups(self): else: self.combined_nc[new_var] = xr.DataArray( ds[orig_var].to_numpy(), - dims=[time_info["time_coord_mapping"][ds[orig_var].dims[0]]], + dims=[dim_name], coords=[ds[orig_var].get_index(orig_var + "_time")], ) self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() + # Add metadata required for cf_xarray decoding + self.combined_nc[new_var].coords[dim_name].attrs["units"] = ( + "seconds since 1970-01-01T00:00:00Z" + ) + self.combined_nc[new_var].coords[dim_name].attrs["standard_name"] = "time" + # Construct useful comment for consolidated time coordinate if time_info["consolidated_time_name"] in self.combined_nc.variables: mapping_info = ", ".join( @@ -507,6 +511,12 @@ def combine_groups(self): f"Consolidated time coordinate from: {mapping_info}" ) + # Write out an intermediate netCDF file so that cf_xarray can decode + # the data properly for nudging positions + intermediate_file = self._intermediate_write_netcdf() + with xr.open_dataset(intermediate_file, decode_cf=True) as ds: + self.combined_nc = ds.load() + # Add nudged longitude and latitude variables to the combined_nc dataset try: nudged_longitude, nudged_latitude = self._nudge_pos() @@ -527,6 +537,30 @@ def combine_groups(self): "units": "degrees_north", "comment": "Dead reckoned latitude nudged to GPS positions", } + # Remove the intermediate file + Path(intermediate_file).unlink() + + def _intermediate_write_netcdf(self) -> None: + """Write out an intermediate combined netCDF file so that data can be + read using decode_cf=True for nudge_positions() to work with cf accessors.""" + log_file = self.args.log_file + netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) + out_fn = Path(netcdfs_dir, f"{Path(log_file).stem}_combined_intermediate.nc") + + self.combined_nc.attrs = self.global_metadata() + self.logger.info("Writing intermediate combined group data to %s", out_fn) + if Path(out_fn).exists(): + Path(out_fn).unlink() + self.combined_nc.to_netcdf(out_fn) + self.logger.info( + "Data variables written: %s", + ", ".join(sorted(self.combined_nc.variables)), + ) + self.logger.info( + "Wrote intermediate (_combined_intermediate.nc) netCDF file: %s", + out_fn, + ) + return out_fn def write_netcdf(self) -> None: log_file = self.args.log_file From 019bdb13b76cfb6bebf8c3b0cb0a81c3db15d8a1 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 11 Nov 2025 16:34:23 -0800 Subject: [PATCH 054/121] WIP: Add time coordinate(s) to combined_nc - still having problems in writing all the universals to the file. --- src/data/combine.py | 46 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index bcb96414..63f123a3 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -427,9 +427,8 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic time_coord_mapping = dict.fromkeys(time_vars, consolidated_name) self.logger.info( - "Group %s: All %d time coordinates identical - consolidating to '%s'", - group_name, - len(time_vars), + "%-65s %s", + f"Consoliding {len(time_vars)} coordinates to", consolidated_name, ) @@ -468,6 +467,39 @@ def combine_groups(self): group_name = group_file.stem.split(f"{GROUP}_")[1].replace("_", "").lower() time_info = self._consolidate_group_time_coords(ds, group_name) + # Add time coordinate(s) to combined_nc + if time_info["consolidated_time_name"]: + self.logger.info( + "Adding consolidated time coordinate %-45s %s", + f"{time_info['consolidated_time_name']} as", + time_info["consolidated_time_name"], + ) + self.combined_nc[time_info["consolidated_time_name"]] = xr.DataArray( + time_info["consolidated_time_data"].to_numpy(), + dims=[time_info["consolidated_time_name"]], + coords={ + time_info["consolidated_time_name"]: time_info[ + "consolidated_time_data" + ].to_numpy() + }, + ) + self.combined_nc[time_info["consolidated_time_name"]].attrs = time_info[ + "consolidated_time_data" + ].attrs.copy() + else: + for orig_time_var, new_time_var in time_info["time_coord_mapping"].items(): + self.logger.info( + "Adding time coordinate %-58s %s", + f"{orig_time_var} as", + new_time_var, + ) + self.combined_nc[new_time_var] = xr.DataArray( + ds[orig_time_var].to_numpy(), + dims=[new_time_var], + coords={new_time_var: ds[orig_time_var].to_numpy()}, + ) + self.combined_nc[new_time_var].attrs = ds[orig_time_var].attrs.copy() + for orig_var in ds.variables: if orig_var.lower().endswith("time"): continue @@ -481,7 +513,9 @@ def combine_groups(self): self.combined_nc[new_var] = xr.DataArray( ds[orig_var].to_numpy() * 180.0 / np.pi, dims=[dim_name], - coords=[ds[orig_var].get_index(orig_var + "_time")], + coords={ + dim_name: ds[orig_var].get_index(orig_var + "_time").to_numpy() + }, ) self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() self.combined_nc[new_var].attrs["units"] = "degrees" @@ -489,7 +523,9 @@ def combine_groups(self): self.combined_nc[new_var] = xr.DataArray( ds[orig_var].to_numpy(), dims=[dim_name], - coords=[ds[orig_var].get_index(orig_var + "_time")], + coords={ + dim_name: ds[orig_var].get_index(orig_var + "_time").to_numpy() + }, ) self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() From d34cf2b5ac60fc8aed67cf3db9019b17f4d7e0b7 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 12 Nov 2025 13:17:19 -0800 Subject: [PATCH 055/121] Factor out several small methods to make combine_groups() less complex, add more log statements. --- src/data/combine.py | 317 ++++++++++++++++++++++++++------------------ 1 file changed, 188 insertions(+), 129 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index 63f123a3..05a48307 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -331,35 +331,6 @@ def _biolume_process(self, sensor): set_to_nan=True, ) - def _nudge_pos(self, max_sec_diff_at_end=10): - """Match variables from lrauv processing to those needed by - AUV.nudged_positions() so that linear nudges to underwater dead reckoned - positions will match the GPS positions at the surface. - """ - try: - lon = self.combined_nc["universals_longitude"] - except KeyError: - error_message = "No universals_longitude data in combined_nc" - raise EOFError(error_message) from None - lat = self.combined_nc["universals_latitude"] - lon_fix = self.combined_nc["nal9602_longitude_fix"] - lat_fix = self.combined_nc["nal9602_latitude_fix"] - - # Use the shared nudge_positions() function from AUV module - lon_nudged, lat_nudged, segment_count, segment_minsum = nudge_positions( - nav_longitude=lon, - nav_latitude=lat, - gps_longitude=lon_fix, - gps_latitude=lat_fix, - logger=self.logger, - auv_name="", - mission="", - max_sec_diff_at_end=max_sec_diff_at_end, - create_plots=True, - ) - - return lon_nudged, lat_nudged - def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dict: """Analyze and consolidate time coordinates for a group. @@ -408,17 +379,37 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic # Compare sizes first if len(time_array) != len(first_time): all_identical = False + self.logger.debug( + "Group %s: Time coordinate '%s' length %d differs from '%s' length %d", + group_name, + _name, + len(time_array), + first_time_name, + len(first_time), + ) break # Compare values with tolerance try: if not np.allclose(time_array.values, first_time.values, atol=1e-6): all_identical = False + self.logger.debug( + "Group %s: Time coordinate '%s' values differ from '%s'", + group_name, + _name, + first_time_name, + ) break except TypeError: # Handle datetime arrays if not np.array_equal(time_array.values, first_time.values): all_identical = False + self.logger.debug( + "Group %s: Time coordinate '%s' values differ from '%s'", + group_name, + _name, + first_time_name, + ) break if all_identical: @@ -453,112 +444,146 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic "time_coord_mapping": time_coord_mapping, } - def combine_groups(self): - log_file = self.args.log_file - src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) - group_files = sorted(src_dir.glob(f"{Path(log_file).stem}_{GROUP}_*.nc")) - self.summary_fields = set() - self.combined_nc = xr.Dataset() - for group_file in group_files: - self.logger.info("Group file: %s", group_file.name) - # Open group file without decoding to have np.allclose work properly - with xr.open_dataset(group_file, decode_cf=False) as ds: - # Group name to prepend variable names is lowercase with underscores removed - group_name = group_file.stem.split(f"{GROUP}_")[1].replace("_", "").lower() - time_info = self._consolidate_group_time_coords(ds, group_name) + def _add_time_coordinates_to_combined(self, time_info: dict, ds: xr.Dataset) -> None: + """Add time coordinates to the combined dataset.""" + if time_info["consolidated_time_name"]: + self._add_consolidated_time_coordinate(time_info) + else: + self._add_separate_time_coordinates(time_info, ds) - # Add time coordinate(s) to combined_nc - if time_info["consolidated_time_name"]: - self.logger.info( - "Adding consolidated time coordinate %-45s %s", - f"{time_info['consolidated_time_name']} as", - time_info["consolidated_time_name"], - ) - self.combined_nc[time_info["consolidated_time_name"]] = xr.DataArray( - time_info["consolidated_time_data"].to_numpy(), - dims=[time_info["consolidated_time_name"]], - coords={ - time_info["consolidated_time_name"]: time_info[ - "consolidated_time_data" - ].to_numpy() - }, - ) - self.combined_nc[time_info["consolidated_time_name"]].attrs = time_info[ - "consolidated_time_data" - ].attrs.copy() - else: - for orig_time_var, new_time_var in time_info["time_coord_mapping"].items(): - self.logger.info( - "Adding time coordinate %-58s %s", - f"{orig_time_var} as", - new_time_var, - ) - self.combined_nc[new_time_var] = xr.DataArray( - ds[orig_time_var].to_numpy(), - dims=[new_time_var], - coords={new_time_var: ds[orig_time_var].to_numpy()}, - ) - self.combined_nc[new_time_var].attrs = ds[orig_time_var].attrs.copy() - - for orig_var in ds.variables: - if orig_var.lower().endswith("time"): - continue - new_var = group_name + "_" + orig_var.lower() - dim_name = time_info["time_coord_mapping"][ds[orig_var].dims[0]] - self.logger.info("Adding variable %-65s %s", f"{orig_var} as", new_var) - if ( - orig_var in ("latitude", "longitude") - and ds[orig_var].attrs.get("units") == "radians" - ): - self.combined_nc[new_var] = xr.DataArray( - ds[orig_var].to_numpy() * 180.0 / np.pi, - dims=[dim_name], - coords={ - dim_name: ds[orig_var].get_index(orig_var + "_time").to_numpy() - }, - ) - self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() - self.combined_nc[new_var].attrs["units"] = "degrees" - else: - self.combined_nc[new_var] = xr.DataArray( - ds[orig_var].to_numpy(), - dims=[dim_name], - coords={ - dim_name: ds[orig_var].get_index(orig_var + "_time").to_numpy() - }, - ) - self.combined_nc[new_var].attrs = ds[orig_var].attrs.copy() + def _add_consolidated_time_coordinate(self, time_info: dict) -> None: + """Add a consolidated time coordinate to the combined dataset.""" + time_name = time_info["consolidated_time_name"] + self.logger.info( + "Adding consolidated time coordinate %-45s %s", + f"{time_name} as", + time_name, + ) + self.combined_nc[time_name] = xr.DataArray( + time_info["consolidated_time_data"].to_numpy(), + dims=[time_name], + coords={time_name: time_info["consolidated_time_data"].to_numpy()}, + ) + self.combined_nc[time_name].attrs = time_info["consolidated_time_data"].attrs.copy() - # Add metadata required for cf_xarray decoding - self.combined_nc[new_var].coords[dim_name].attrs["units"] = ( - "seconds since 1970-01-01T00:00:00Z" - ) - self.combined_nc[new_var].coords[dim_name].attrs["standard_name"] = "time" - - # Construct useful comment for consolidated time coordinate - if time_info["consolidated_time_name"] in self.combined_nc.variables: - mapping_info = ", ".join( - [ - f"{orig} -> {new}" - for orig, new in time_info["time_coord_mapping"].items() - ] - ) - self.combined_nc[time_info["consolidated_time_name"]].attrs["comment"] = ( - f"Consolidated time coordinate from: {mapping_info}" - ) + def _add_separate_time_coordinates(self, time_info: dict, ds: xr.Dataset) -> None: + """Add separate time coordinates to the combined dataset.""" + for orig_time_var, new_time_var in time_info["time_coord_mapping"].items(): + self.logger.info( + "Adding time coordinate %-58s %s", + f"{orig_time_var} as", + new_time_var, + ) + self.combined_nc[new_time_var] = xr.DataArray( + ds[orig_time_var].to_numpy(), + dims=[new_time_var], + coords={new_time_var: ds[orig_time_var].to_numpy()}, + ) + self.combined_nc[new_time_var].attrs = ds[orig_time_var].attrs.copy() + + def _get_time_coordinate_data(self, time_info: dict, ds: xr.Dataset, orig_time_dim: str): + """Get the appropriate time coordinate data for a variable.""" + if time_info["consolidated_time_name"]: + return time_info["consolidated_time_data"].to_numpy() + return ds[orig_time_dim].to_numpy() + + def _create_data_array_for_variable( + self, ds: xr.Dataset, orig_var: str, dim_name: str, time_coord_data + ) -> xr.DataArray: + """Create a DataArray for a variable, handling unit conversions.""" + if orig_var in ("latitude", "longitude") and ds[orig_var].attrs.get("units") == "radians": + data_array = xr.DataArray( + ds[orig_var].to_numpy() * 180.0 / np.pi, + dims=[dim_name], + coords={dim_name: time_coord_data}, + ) + data_array.attrs = ds[orig_var].attrs.copy() + data_array.attrs["units"] = "degrees" + else: + data_array = xr.DataArray( + ds[orig_var].to_numpy(), + dims=[dim_name], + coords={dim_name: time_coord_data}, + ) + data_array.attrs = ds[orig_var].attrs.copy() + return data_array - # Write out an intermediate netCDF file so that cf_xarray can decode - # the data properly for nudging positions - intermediate_file = self._intermediate_write_netcdf() - with xr.open_dataset(intermediate_file, decode_cf=True) as ds: - self.combined_nc = ds.load() + def _add_time_metadata_to_variable(self, var_name: str, dim_name: str) -> None: + """Add required time metadata for cf_xarray decoding.""" + self.combined_nc[var_name].coords[dim_name].attrs["units"] = ( + "seconds since 1970-01-01T00:00:00Z" + ) + self.combined_nc[var_name].coords[dim_name].attrs["standard_name"] = "time" + + def _process_group_variables(self, ds: xr.Dataset, group_name: str, time_info: dict) -> None: + """Process all data variables in a group.""" + for orig_var in ds.variables: + if orig_var.lower().endswith("time"): + continue + + # Skip scalar variables (no dimensions) + if len(ds[orig_var].dims) == 0: + self.logger.debug("Skipping scalar variable: %s", orig_var) + continue + + new_var = group_name + "_" + orig_var.lower() - # Add nudged longitude and latitude variables to the combined_nc dataset + # Get the original time dimension for this variable + orig_time_dim = ds[orig_var].dims[0] # Assuming first dim is time + + # Check if this dimension has a mapping + if orig_time_dim not in time_info["time_coord_mapping"]: + self.logger.warning( + "No time mapping found for %s dimension %s", orig_var, orig_time_dim + ) + continue + + dim_name = time_info["time_coord_mapping"][orig_time_dim] + time_coord_data = self._get_time_coordinate_data(time_info, ds, orig_time_dim) + + self.logger.info("Adding variable %-65s %s", f"{orig_var} as", new_var) + + # Create the data array + self.combined_nc[new_var] = self._create_data_array_for_variable( + ds, orig_var, dim_name, time_coord_data + ) + + # Add time metadata + self._add_time_metadata_to_variable(new_var, dim_name) + + def _add_consolidation_comment(self, time_info: dict) -> None: + """Add a comment documenting time coordinate consolidation.""" + if time_info["consolidated_time_name"] in self.combined_nc.variables: + mapping_info = ", ".join( + [f"{orig} -> {new}" for orig, new in time_info["time_coord_mapping"].items()] + ) + self.combined_nc[time_info["consolidated_time_name"]].attrs["comment"] = ( + f"Consolidated time coordinate from: {mapping_info}" + ) + + def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10) -> None: + """Add nudged longitude and latitude variables to the combined dataset.""" try: - nudged_longitude, nudged_latitude = self._nudge_pos() + nudged_longitude, nudged_latitude, segment_count, segment_minsum = nudge_positions( + nav_longitude=self.combined_nc["universals_longitude"], + nav_latitude=self.combined_nc["universals_latitude"], + gps_longitude=self.combined_nc["nal9602_longitude_fix"], + gps_latitude=self.combined_nc["nal9602_latitude_fix"], + logger=self.logger, + auv_name="", + mission="", + max_sec_diff_at_end=max_sec_diff_at_end, + create_plots=True, + ) except ValueError as e: self.logger.error("Nudging positions failed: %s", e) # noqa: TRY400 return + + self.logger.info( + "nudge_positions created %d segments with segment_minsum = %f", + segment_count, + segment_minsum, + ) self.combined_nc["nudged_longitude"] = nudged_longitude self.combined_nc["nudged_longitude"].attrs = { "long_name": "Nudged Longitude", @@ -573,8 +598,42 @@ def combine_groups(self): "units": "degrees_north", "comment": "Dead reckoned latitude nudged to GPS positions", } - # Remove the intermediate file - Path(intermediate_file).unlink() + + def combine_groups(self): + """Combine group files into a single NetCDF dataset with consolidated time coordinates.""" + log_file = self.args.log_file + src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) + group_files = sorted(src_dir.glob(f"{Path(log_file).stem}_{GROUP}_*.nc")) + self.summary_fields = set() + self.combined_nc = xr.Dataset() + + for group_file in group_files: + self.logger.info("Group file: %s", group_file.name) + # Open group file without decoding to have np.allclose work properly + with xr.open_dataset(group_file, decode_cf=False) as ds: + # Group name to prepend variable names is lowercase with underscores removed + group_name = group_file.stem.split(f"{GROUP}_")[1].replace("_", "").lower() + time_info = self._consolidate_group_time_coords(ds, group_name) + + # Add time coordinate(s) to combined dataset + self._add_time_coordinates_to_combined(time_info, ds) + + # Process all data variables in the group + self._process_group_variables(ds, group_name, time_info) + + # Add consolidation comment if applicable + self._add_consolidation_comment(time_info) + + # Write intermediate file for cf_xarray decoding + intermediate_file = self._intermediate_write_netcdf() + with xr.open_dataset(intermediate_file, decode_cf=True) as ds: + self.combined_nc = ds.load() + + # Add nudged coordinates + self._add_nudged_coordinates() + + # Clean up intermediate file + ##Path(intermediate_file).unlink() def _intermediate_write_netcdf(self) -> None: """Write out an intermediate combined netCDF file so that data can be From 38be49be3b04f1e49ed0e286aff6ca9e78143e2e Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 12 Nov 2025 13:33:45 -0800 Subject: [PATCH 056/121] Shift dead reckoned nav data by 1 for shared nudge_positions() to work. Use --plot option. --- src/data/combine.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index 05a48307..0f04e03d 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -565,15 +565,16 @@ def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10) -> None: """Add nudged longitude and latitude variables to the combined dataset.""" try: nudged_longitude, nudged_latitude, segment_count, segment_minsum = nudge_positions( - nav_longitude=self.combined_nc["universals_longitude"], - nav_latitude=self.combined_nc["universals_latitude"], + # For LRAUV data the nav positions are shifted by 1 to align with GPS fixes + nav_longitude=self.combined_nc["universals_longitude"].shift(universals_time=1), + nav_latitude=self.combined_nc["universals_latitude"].shift(universals_time=1), gps_longitude=self.combined_nc["nal9602_longitude_fix"], gps_latitude=self.combined_nc["nal9602_latitude_fix"], logger=self.logger, auv_name="", mission="", max_sec_diff_at_end=max_sec_diff_at_end, - create_plots=True, + create_plots=self.args.plot, ) except ValueError as e: self.logger.error("Nudging positions failed: %s", e) # noqa: TRY400 @@ -691,12 +692,6 @@ def process_command_line(self): description=__doc__, epilog=examples, ) - - parser.add_argument( - "--noinput", - action="store_true", - help="Execute without asking for a response, e.g. to not ask to re-download file", - ) parser.add_argument( "--log_file", action="store", @@ -708,10 +703,8 @@ def process_command_line(self): ) parser.add_argument( "--plot", - action="store", - help="Create intermediate plots" - " to validate data operations. Use first to plot " - " points, e.g. first2000. Program blocks upon show.", + action="store_true", + help="Create intermediate plot(s) to help validate processing", ) parser.add_argument( "-v", From 3c77dfd26595571f007c5f8bfc5590822a2df4d0 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 12 Nov 2025 16:47:41 -0800 Subject: [PATCH 057/121] WIP: Making work for data from *_combined.nc --- src/data/align.py | 289 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 273 insertions(+), 16 deletions(-) diff --git a/src/data/align.py b/src/data/align.py index f8603424..bab78d52 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -14,6 +14,7 @@ import argparse import logging +import os import re import sys import time @@ -42,6 +43,10 @@ class InvalidCalFile(Exception): pass +class InvalidCombinedFile(Exception): + pass + + class Align_NetCDF: logger = logging.getLogger(__name__) _handler = logging.StreamHandler() @@ -53,6 +58,8 @@ def global_metadata(self): """Use instance variables to return a dictionary of metadata specific for the data that are written """ + # Try to get actual host name, fall back to container name + actual_hostname = os.getenv("HOST_NAME", gethostname()) repo = git.Repo(search_parent_directories=True) try: gitcommit = repo.head.object.hexsha @@ -94,17 +101,30 @@ def global_metadata(self): metadata["useconst"] = "Not intended for legal use. Data may contain inaccuracies." metadata["history"] = f"Created by {self.commandline} on {iso_now}" - metadata["title"] = ( - f"Calibrated and aligned AUV sensor data from" - f" {self.args.auv_name} mission {self.args.mission}" - ) - from_data = "calibrated data" - metadata["source"] = ( - f"MBARI Dorado-class AUV data produced from {from_data}" - f" with execution of '{self.commandline}' at {iso_now} on" - f" host {gethostname()} using git commit {gitcommit} from" - f" software at 'https://github.com/mbari-org/auv-python'" - ) + if self.args.auv_name and self.args.mission: + metadata["title"] = ( + f"Calibrated and aligned AUV sensor data from" + f" {self.args.auv_name} mission {self.args.mission}" + ) + from_data = "calibrated data" + metadata["source"] = ( + f"MBARI Dorado-class AUV data produced from {from_data}" + f" with execution of '{self.commandline}' at {iso_now} on" + f" host {actual_hostname} using git commit {gitcommit} from" + f" software at 'https://github.com/mbari-org/auv-python'" + ) + else: + metadata["title"] = ( + f"Combined and aligned LRAUV instrument data from" + f" log file {Path(self.args.log_file).name}" + ) + from_data = "combined data" + metadata["source"] = ( + f"MBARI Long Range AUV data produced from {from_data}" + f" with execution of '{self.commandline}' at {iso_now} on" + f" host {actual_hostname} using git commit {gitcommit} from" + f" software at 'https://github.com/mbari-org/auv-python'" + ) metadata["summary"] = ( "Observational oceanographic data obtained from an Autonomous" " Underwater Vehicle mission with measurements at" @@ -115,7 +135,7 @@ def global_metadata(self): # Append location of original data files to summary matches = re.search( "(" + SUMMARY_SOURCE.replace("{}", r".+$") + ")", - self.calibrated_nc.attrs["summary"], + self.combined_nc.attrs["summary"], ) if matches: metadata["summary"] += " " + matches.group(1) @@ -334,6 +354,229 @@ def process_cal(self, vehicle: str = "", name: str = "", log_file: str = "") -> return netcdfs_dir + def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR0915 + """Process combined LRAUV data from *_combined.nc files created by combine.py""" + netcdfs_dir = Path(BASE_LRAUV_PATH, f"{Path(log_file).parent}") + src_file = Path(netcdfs_dir, f"{Path(log_file).stem}_combined.nc") + + self.combined_nc = xr.open_dataset(src_file) + self.logger.info("Processing %s", src_file) + self.aligned_nc = xr.Dataset() + self.min_time = datetime.now(UTC) + self.max_time = datetime(1970, 1, 1, tzinfo=UTC) + self.min_depth = np.inf + self.max_depth = -np.inf + self.min_lat = np.inf + self.max_lat = -np.inf + self.min_lon = np.inf + self.max_lon = -np.inf + + # Find navigation coordinates from combined data - must be from universals group + nav_coords = {} + for coord_type in ["longitude", "latitude", "depth", "time"]: + coord_var = f"universals_{coord_type}" + if coord_var not in self.combined_nc: + error_message = ( + f"Required universals coordinate {coord_var} not found in {src_file}" + ) + raise InvalidCombinedFile(error_message) + nav_coords[coord_type] = coord_var + self.logger.info("Found navigation coordinate: %s", coord_var) + + # Create interpolators for navigation coordinates + try: + lat_interp = interp1d( + self.combined_nc[nav_coords["latitude"]] + .get_index("universals_time") + .view(np.int64) + .tolist(), + self.combined_nc[nav_coords["latitude"]].values, + fill_value=( + self.combined_nc[nav_coords["latitude"]][0], + self.combined_nc[nav_coords["latitude"]][-1], + ), + bounds_error=False, + ) + + lon_interp = interp1d( + self.combined_nc[nav_coords["longitude"]] + .get_index("universals_time") + .view(np.int64) + .tolist(), + self.combined_nc[nav_coords["longitude"]].values, + fill_value=( + self.combined_nc[nav_coords["longitude"]][0], + self.combined_nc[nav_coords["longitude"]][-1], + ), + bounds_error=False, + ) + + depth_interp = interp1d( + self.combined_nc[nav_coords["depth"]] + .get_index("universals_time") + .view(np.int64) + .tolist(), + self.combined_nc[nav_coords["depth"]].values, + fill_value=( + self.combined_nc[nav_coords["depth"]][0], + self.combined_nc[nav_coords["depth"]][-1], + ), + bounds_error=False, + ) + + except KeyError as e: + error_message = f"Missing navigation data in {src_file}: {e}" + raise InvalidCombinedFile(error_message) from e + except ValueError as e: + error_message = f"Cannot interpolate navigation coordinates: {e}" + raise InvalidCombinedFile(error_message) from e + + # Process group-based variables (skip coordinate variables) + for variable in self.combined_nc: + # Skip time coordinate variables + if variable.endswith("_time"): + continue + + # Skip the navigation coordinate variables themselves + if variable in nav_coords.values(): + continue + + # Extract group name from variable (e.g., "ctd_seabird_salinity" -> "ctd_seabird") + var_parts = variable.split("_") + if len(var_parts) < 2: # noqa: PLR2004 + self.logger.debug("Skipping variable with unexpected name format: %s", variable) + continue + + # Try to find the corresponding time coordinate + # Look for pattern: group_name + "_time" + possible_time_coords = [] + for i in range(len(var_parts)): + group_candidate = "_".join(var_parts[: i + 1]) + time_coord_candidate = f"{group_candidate}_time" + if time_coord_candidate in self.combined_nc: + possible_time_coords.append((group_candidate, time_coord_candidate)) + + if not possible_time_coords: + self.logger.warning("No time coordinate found for variable: %s", variable) + continue + + # Use the longest matching group name (most specific) + group_name, timevar = max(possible_time_coords, key=lambda x: len(x[0])) + self.logger.debug( + "Processing %s with group %s and time %s", variable, group_name, timevar + ) + + # Copy the original variable + self.aligned_nc[variable] = self.combined_nc[variable] + + # Get the time index for this variable + var_time = self.aligned_nc[variable].get_index(timevar).view(np.int64).tolist() + + # Calculate sampling rate + sample_rate = np.round( + 1.0 / (np.mean(np.diff(self.combined_nc[timevar])) / np.timedelta64(1, "s")), + decimals=2, + ) + + # Create aligned variable with proper attributes + self.aligned_nc[variable] = xr.DataArray( + self.combined_nc[variable].values, + dims={timevar}, + coords=[self.combined_nc[variable].get_index(timevar)], + name=variable, + ) + self.aligned_nc[variable].attrs = self.combined_nc[variable].attrs + self.aligned_nc[variable].attrs["coordinates"] = ( + f"{group_name}_time {group_name}_depth {group_name}_latitude {group_name}_longitude" + ) + self.logger.info("%s: instrument_sample_rate_hz = %.2f", variable, sample_rate) + self.aligned_nc[variable].attrs["instrument_sample_rate_hz"] = sample_rate + + # Create interpolated coordinate variables for this group + coord_names = ["depth", "latitude", "longitude"] + coord_interps = [depth_interp, lat_interp, lon_interp] + coord_sources = [nav_coords["depth"], nav_coords["latitude"], nav_coords["longitude"]] + + for coord_name, coord_interp, coord_source in zip( + coord_names, coord_interps, coord_sources, strict=True + ): + coord_var_name = f"{group_name}_{coord_name}" + + self.aligned_nc[coord_var_name] = xr.DataArray( + coord_interp(var_time).astype(np.float64).tolist(), + dims={timevar}, + coords=[self.combined_nc[variable].get_index(timevar)], + name=coord_var_name, + ) + + # Copy attributes from source coordinate + if coord_source in self.combined_nc: + self.aligned_nc[coord_var_name].attrs = self.combined_nc[coord_source].attrs + + # Update attributes + self.aligned_nc[coord_var_name].attrs["long_name"] = coord_name.title() + self.aligned_nc[coord_var_name].attrs["instrument_sample_rate_hz"] = sample_rate + + if coord_name in ["latitude", "longitude"]: + self.aligned_nc[coord_var_name].attrs["comment"] = ( + self.aligned_nc[coord_var_name].attrs.get("comment", "") + + f". Variable {coord_source} from {src_file} file linearly" + f" interpolated onto {group_name} time values." + ) + + # Update spatial temporal bounds for global metadata + if pd.to_datetime(self.aligned_nc[timevar][0].values).tz_localize(UTC) < pd.to_datetime( + self.min_time + ): + self.min_time = pd.to_datetime(self.aligned_nc[timevar][0].values).tz_localize(UTC) + if pd.to_datetime(self.aligned_nc[timevar][-1].values).tz_localize( + UTC + ) > pd.to_datetime(self.max_time): + self.max_time = pd.to_datetime(self.aligned_nc[timevar][-1].values).tz_localize(UTC) + + # Update bounds using the interpolated coordinates + depth_coord = f"{group_name}_depth" + lat_coord = f"{group_name}_latitude" + lon_coord = f"{group_name}_longitude" + + if self.aligned_nc[depth_coord].min() < self.min_depth: + self.min_depth = self.aligned_nc[depth_coord].min().to_numpy() + if self.aligned_nc[depth_coord].max() > self.max_depth: + self.max_depth = self.aligned_nc[depth_coord].max().to_numpy() + if self.aligned_nc[lat_coord].min() < self.min_lat: + self.min_lat = self.aligned_nc[lat_coord].min().to_numpy() + if self.aligned_nc[lat_coord].max() > self.max_lat: + self.max_lat = self.aligned_nc[lat_coord].max().to_numpy() + if self.aligned_nc[lon_coord].min() < self.min_lon: + self.min_lon = self.aligned_nc[lon_coord].min().to_numpy() + if self.aligned_nc[lon_coord].max() > self.max_lon: + self.max_lon = self.aligned_nc[lon_coord].max().to_numpy() + + return netcdfs_dir + + def write_combined_netcdf( + self, netcdfs_dir, vehicle: str = "", name: str = "", log_file: str = "" + ) -> None: + """Write aligned combined data to NetCDF file""" + if log_file: + # For LRAUV log files, use the log file stem for output name + out_fn = Path(netcdfs_dir, f"{Path(log_file).stem}_align.nc") + else: + name = name or self.args.mission + vehicle = vehicle or self.args.auv_name + out_fn = Path(netcdfs_dir, f"{vehicle}_{name}_align.nc") + + self.aligned_nc.attrs = self.global_metadata() + self.logger.info("Writing aligned combined data to %s", out_fn) + if out_fn.exists(): + self.logger.debug("Removing existing file %s", out_fn) + out_fn.unlink() + self.aligned_nc.to_netcdf(out_fn) + self.logger.info( + "Data variables written: %s", + ", ".join(sorted(self.aligned_nc.variables)), + ) + def write_netcdf(self, netcdfs_dir, vehicle: str = "", name: str = "") -> None: name = name or self.args.mission vehicle = vehicle or self.args.auv_name @@ -354,6 +597,13 @@ def process_command_line(self): examples += " Align calibrated data for some missions:\n" examples += " " + sys.argv[0] + " --mission 2020.064.10\n" examples += " " + sys.argv[0] + " --auv_name i2map --mission 2020.055.01\n" + examples += " Align combined LRAUV data:\n" + examples += ( + " " + + sys.argv[0] + + " --log_file brizo/missionlogs/2025/20250909_20250915/20250914T080941/" + + "202509140809_202509150109.nc4\n" + ) parser = argparse.ArgumentParser( formatter_class=RawTextHelpFormatter, @@ -415,10 +665,17 @@ def process_command_line(self): align_netcdf = Align_NetCDF() align_netcdf.process_command_line() p_start = time.time() - if align_netcdf.args.auv_name and align_netcdf.args.mission: + + if align_netcdf.args.log_file: + # Process combined LRAUV data using log_file + netcdf_dir = align_netcdf.process_combined(log_file=align_netcdf.args.log_file) + align_netcdf.write_combined_netcdf(netcdf_dir, log_file=align_netcdf.args.log_file) + elif align_netcdf.args.auv_name and align_netcdf.args.mission: + # Process calibrated data using auv_name and mission netcdf_dir = align_netcdf.process_cal() align_netcdf.write_netcdf(netcdf_dir) - elif align_netcdf.args.log_file: - netcdf_dir = align_netcdf.process_cal(log_file=align_netcdf.args.log_file) - align_netcdf.write_netcdf(netcdf_dir) + else: + align_netcdf.logger.error("Must provide either --log_file or both --auv_name and --mission") + sys.exit(1) + align_netcdf.logger.info("Time to process: %.2f seconds", (time.time() - p_start)) From a8eb98f1b0f2992b56340d7e23fb7188e3e6d336 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 13 Nov 2025 11:44:21 -0800 Subject: [PATCH 058/121] Update numbers for tests to pass locally. --- src/data/test_process_dorado.py | 4 ++-- src/data/test_process_i2map.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index 90ec047b..a729848e 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -33,7 +33,7 @@ def test_process_dorado(complete_dorado_processing): # update the expected size here. EXPECTED_SIZE_GITHUB = 621286 EXPECTED_SIZE_ACT = 621298 - EXPECTED_SIZE_LOCAL = 621286 + EXPECTED_SIZE_LOCAL = 621452 if str(proc.args.base_path).startswith("/home/runner"): # The size is different in GitHub Actions, maybe due to different metadata assert nc_file.stat().st_size == EXPECTED_SIZE_GITHUB # noqa: S101 @@ -52,7 +52,7 @@ def test_process_dorado(complete_dorado_processing): # Check that the MD5 hash has not changed EXPECTED_MD5_GITHUB = "9f3f9e2e5abed08692ddb233dec0d0ac" EXPECTED_MD5_ACT = "bdb9473e5dedb694618f518b8cf0ca1e" - EXPECTED_MD5_LOCAL = "6ecb2229b00835055619e982fe9d5023" + EXPECTED_MD5_LOCAL = "9137be5a2ed840cfca94a723285355ec" if str(proc.args.base_path).startswith("/home/runner"): # The MD5 hash is different in GitHub Actions, maybe due to different metadata assert hashlib.md5(open(nc_file, "rb").read()).hexdigest() == EXPECTED_MD5_GITHUB # noqa: PTH123, S101, S324, SIM115 diff --git a/src/data/test_process_i2map.py b/src/data/test_process_i2map.py index e2f6cb05..82fec722 100644 --- a/src/data/test_process_i2map.py +++ b/src/data/test_process_i2map.py @@ -32,7 +32,7 @@ def test_process_i2map(complete_i2map_processing): # update the expected size here. EXPECTED_SIZE_GITHUB = 58832 EXPECTED_SIZE_ACT = 58816 - EXPECTED_SIZE_LOCAL = 58884 + EXPECTED_SIZE_LOCAL = 59042 if str(proc.args.base_path).startswith("/home/runner"): # The size is different in GitHub Actions, maybe due to different metadata assert nc_file.stat().st_size == EXPECTED_SIZE_GITHUB # noqa: S101 From 71445cecf0118e377491dfdd89be4e600e903795 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 13 Nov 2025 11:45:04 -0800 Subject: [PATCH 059/121] Look for upstream summary metadata in the correct data member. --- src/data/align.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/data/align.py b/src/data/align.py index bab78d52..7e69109a 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -113,7 +113,7 @@ def global_metadata(self): f" host {actual_hostname} using git commit {gitcommit} from" f" software at 'https://github.com/mbari-org/auv-python'" ) - else: + elif self.args.log_file: metadata["title"] = ( f"Combined and aligned LRAUV instrument data from" f" log file {Path(self.args.log_file).name}" @@ -133,10 +133,16 @@ def global_metadata(self): " software." ) # Append location of original data files to summary - matches = re.search( - "(" + SUMMARY_SOURCE.replace("{}", r".+$") + ")", - self.combined_nc.attrs["summary"], - ) + if self.args.auv_name and self.args.mission: + matches = re.search( + "(" + SUMMARY_SOURCE.replace("{}", r".+$") + ")", + self.calibrated_nc.attrs["summary"], + ) + elif self.args.log_file: + matches = re.search( + "(" + SUMMARY_SOURCE.replace("{}", r".+$") + ")", + self.combined_nc.attrs["summary"], + ) if matches: metadata["summary"] += " " + matches.group(1) metadata["comment"] = ( From 794eed82ac96794329dff3726089182d60b5a00c Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 13 Nov 2025 11:45:43 -0800 Subject: [PATCH 060/121] Set create_plots to False - change back to True for debugging. --- src/data/calibrate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/calibrate.py b/src/data/calibrate.py index 2cdc8941..c9e735f0 100755 --- a/src/data/calibrate.py +++ b/src/data/calibrate.py @@ -1676,7 +1676,7 @@ def _nudge_pos(self, max_sec_diff_at_end=10): auv_name=self.args.auv_name, mission=self.args.mission, max_sec_diff_at_end=max_sec_diff_at_end, - create_plots=True, + create_plots=False, ) # Store results in instance variables for compatibility From d389dc147b82e79f89f523780d658d51ccfc0ff3 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 13 Nov 2025 11:46:37 -0800 Subject: [PATCH 061/121] Add setting of HOST_NAME environment variable for more meaningful metadata. --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 81417927..4861c407 100644 --- a/README.md +++ b/README.md @@ -103,11 +103,11 @@ First time use with Docker on a server using a service account: * git clone git@github.com:mbari-org/auv-python.git * cd auv-python * Create a .env file in `/opt/auv-python` with the following contents: - `M3_VOL=` - `AUVCTD_VOL=` - `CALIBRATION_VOL=` - `WORK_VOL=/data` - + `M3_VOL=` + `AUVCTD_VOL=` + `CALIBRATION_VOL=` + `WORK_VOL=/data` + `HOST_NAME=` After installation and when logging into the server again mission data can be processed thusly: * Setting up environment and printing help message: `sudo -u docker_user -i` From 8643e7df4f0f7701e8445a878e5c6c9c0f9e95c3 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 13 Nov 2025 11:50:52 -0800 Subject: [PATCH 062/121] Update numbers for tests to pass in Actions. --- src/data/test_process_dorado.py | 2 +- src/data/test_process_i2map.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index a729848e..56c73b58 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -31,7 +31,7 @@ def test_process_dorado(complete_dorado_processing): # but it will alert us if a code change unexpectedly changes the file size. # If code changes are expected to change the file size then we should # update the expected size here. - EXPECTED_SIZE_GITHUB = 621286 + EXPECTED_SIZE_GITHUB = 59042 EXPECTED_SIZE_ACT = 621298 EXPECTED_SIZE_LOCAL = 621452 if str(proc.args.base_path).startswith("/home/runner"): diff --git a/src/data/test_process_i2map.py b/src/data/test_process_i2map.py index 82fec722..66508695 100644 --- a/src/data/test_process_i2map.py +++ b/src/data/test_process_i2map.py @@ -30,7 +30,7 @@ def test_process_i2map(complete_i2map_processing): # but it will alert us if a code change unexpectedly changes the file size. # If code changes are expected to change the file size then we should # update the expected size here. - EXPECTED_SIZE_GITHUB = 58832 + EXPECTED_SIZE_GITHUB = 58942 EXPECTED_SIZE_ACT = 58816 EXPECTED_SIZE_LOCAL = 59042 if str(proc.args.base_path).startswith("/home/runner"): From 2da3e11aa1aa5360c7b7b7942cf1bda8464f6373 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 13 Nov 2025 12:00:07 -0800 Subject: [PATCH 063/121] Try again with EXPECTED_SIZE_GITHUB. --- src/data/test_process_dorado.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index 56c73b58..ffd8cb58 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -31,7 +31,7 @@ def test_process_dorado(complete_dorado_processing): # but it will alert us if a code change unexpectedly changes the file size. # If code changes are expected to change the file size then we should # update the expected size here. - EXPECTED_SIZE_GITHUB = 59042 + EXPECTED_SIZE_GITHUB = 621404 EXPECTED_SIZE_ACT = 621298 EXPECTED_SIZE_LOCAL = 621452 if str(proc.args.base_path).startswith("/home/runner"): From fbe620a2decff5c163a4623488061e6c572e20bb Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 13 Nov 2025 12:02:52 -0800 Subject: [PATCH 064/121] Update EXPECTED_MD5_GITHUB. --- src/data/test_process_dorado.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index ffd8cb58..d368b183 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -50,7 +50,7 @@ def test_process_dorado(complete_dorado_processing): check_md5 = True if check_md5: # Check that the MD5 hash has not changed - EXPECTED_MD5_GITHUB = "9f3f9e2e5abed08692ddb233dec0d0ac" + EXPECTED_MD5_GITHUB = "3bab0300e575c1d752a35f49e49e340e" EXPECTED_MD5_ACT = "bdb9473e5dedb694618f518b8cf0ca1e" EXPECTED_MD5_LOCAL = "9137be5a2ed840cfca94a723285355ec" if str(proc.args.base_path).startswith("/home/runner"): From 97f46041c28c14c5dd41393991e061d9107a4dc4 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 13 Nov 2025 13:28:25 -0800 Subject: [PATCH 065/121] Preparing for processing LRAUV data through the align step. --- .vscode/launch.json | 8 ++++++-- src/data/process.py | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 59e53006..4efd7108 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -60,6 +60,8 @@ //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/latitude_time"] // brizo 20250916T230652 has several ESP Samples from stoqs_lrauv_sep2025 "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot_time", "/longitude_time"] + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109_cleaned_by_quinn.nc4", "--plot_time", "/longitude_time"] + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109_cleaned_by_quinn_latlon.nc4", "--plot_time", "/longitude_time"] }, { "name": "2.0 - calibrate.py", @@ -137,7 +139,8 @@ //"args": ["-v", "1", "--auv_name", "dorado", "--mission", "2004.236.00"], //"args": ["-v", "1", "--auv_name", "dorado", "--mission", "2008.289.03"], //"args": ["-v", "1", "--auv_name", "dorado", "--mission", "2023.192.01"], - "args": ["-v", "1", "--auv_name", "dorado", "--mission", "2024.317.01"], + //"args": ["-v", "1", "--auv_name", "dorado", "--mission", "2024.317.01"], + "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] }, { "name": "3.1 - align.py for LRAUV --log_file", @@ -146,7 +149,8 @@ "program": "${workspaceFolder}/src/data/align.py", "console": "integratedTerminal", "justMyCode": false, - "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"], + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"], + "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] }, { "name": "4.0 - resample.py", diff --git a/src/data/process.py b/src/data/process.py index 6856f3db..a9e69c99 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -314,7 +314,7 @@ def align(self, mission: str = "", log_file: str = "") -> None: align_netcdf.commandline = self.commandline try: if log_file: - netcdf_dir = align_netcdf.process_cal(log_file=log_file) + netcdf_dir = align_netcdf.process_combined(log_file=log_file) else: netcdf_dir = align_netcdf.process_cal() align_netcdf.write_netcdf(netcdf_dir) From b2eefe04574165b72e204d485d973daa79aa7a2f Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 13 Nov 2025 14:34:28 -0800 Subject: [PATCH 066/121] WIP: Start with decoding the group name more simply for LRAUV data. --- src/data/align.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/data/align.py b/src/data/align.py index 7e69109a..703c0edb 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -447,7 +447,9 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 if variable in nav_coords.values(): continue - # Extract group name from variable (e.g., "ctd_seabird_salinity" -> "ctd_seabird") + # Extract group name from variable following convention for LRAUV data + # enforced in combine.py where first underscore separates group name + # from the rest of the variable name var_parts = variable.split("_") if len(var_parts) < 2: # noqa: PLR2004 self.logger.debug("Skipping variable with unexpected name format: %s", variable) From 4f394569d53703cd742cebee660cb36289acc327 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 18 Nov 2025 12:55:14 -0800 Subject: [PATCH 067/121] Add nudged_ variables, add variable_time_coord_mapping to global metadata. Need variable_time_coord_mapping in metadata so that align.py can use it for setting coordinates attribute for each variable. --- src/data/combine.py | 52 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index 0f04e03d..a1f40b70 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -39,6 +39,7 @@ __copyright__ = "Copyright 2025, Monterey Bay Aquarium Research Institute" import argparse # noqa: I001 +import json import logging import sys import time @@ -89,6 +90,7 @@ class Combine_NetCDF: _handler.setFormatter(AUV_NetCDF._formatter) logger.addHandler(_handler) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) + variable_time_coord_mapping: dict = {} def global_metadata(self): """Use instance variables to return a dictionary of @@ -119,6 +121,7 @@ def global_metadata(self): metadata["license"] = metadata["distribution_statement"] metadata["useconst"] = "Not intended for legal use. Data may contain inaccuracies." metadata["history"] = f"Created by {self.commandline} on {iso_now}" + metadata["variable_time_coord_mapping"] = json.dumps(self.variable_time_coord_mapping) log_file = self.args.log_file metadata["title"] = ( f"Combined LRAUV data from {log_file} - relevant variables extracted for STOQS" @@ -339,6 +342,7 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic - consolidated_time_name: name of consolidated coordinate (or None) - consolidated_time_data: the time coordinate data (or None) - time_coord_mapping: dict mapping original dims to consolidated dims + - variable_time_coord_mapping: dict mapping variables to their time coords """ # Find all time variables in this group time_vars = {var: ds[var] for var in ds.variables if var.lower().endswith("time")} @@ -348,6 +352,7 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic "consolidated_time_name": None, "consolidated_time_data": None, "time_coord_mapping": {}, + "variable_time_coord_mapping": {}, } if len(time_vars) == 1: @@ -360,10 +365,15 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic time_name, consolidated_name, ) + time_coord_mapping = {time_name: consolidated_name} return { "consolidated_time_name": consolidated_name, "consolidated_time_data": ds[time_name], - "time_coord_mapping": {time_name: consolidated_name}, + "time_coord_mapping": time_coord_mapping, + "variable_time_coord_mapping": { + f"{group_name}_{k.split('_time')[0].lower()}": v + for k, v in time_coord_mapping.items() + }, } # Multiple time coordinates - check if they're identical @@ -427,6 +437,10 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic "consolidated_time_name": consolidated_name, "consolidated_time_data": ds[first_time_name], "time_coord_mapping": time_coord_mapping, + "variable_time_coord_mapping": { + f"{group_name}_{k.split('_time')[0].lower()}": consolidated_name + for k in time_vars + }, } # Time coordinates differ - keep them separate @@ -442,6 +456,10 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic "consolidated_time_name": None, "consolidated_time_data": None, "time_coord_mapping": time_coord_mapping, + "variable_time_coord_mapping": { + f"{group_name}_{k.split('_time')[0].lower()}": v + for k, v in time_coord_mapping.items() + }, } def _add_time_coordinates_to_combined(self, time_info: dict, ds: xr.Dataset) -> None: @@ -499,6 +517,7 @@ def _create_data_array_for_variable( ) data_array.attrs = ds[orig_var].attrs.copy() data_array.attrs["units"] = "degrees" + data_array.attrs["coordinates"] = f"{dim_name}" else: data_array = xr.DataArray( ds[orig_var].to_numpy(), @@ -506,6 +525,8 @@ def _create_data_array_for_variable( coords={dim_name: time_coord_data}, ) data_array.attrs = ds[orig_var].attrs.copy() + data_array.attrs["comment"] = f"{orig_var} from group {ds.attrs.get('group_name', '')}" + data_array.attrs["coordinates"] = f"{dim_name}" return data_array def _add_time_metadata_to_variable(self, var_name: str, dim_name: str) -> None: @@ -585,19 +606,35 @@ def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10) -> None: segment_count, segment_minsum, ) - self.combined_nc["nudged_longitude"] = nudged_longitude + self.combined_nc["nudged_longitude"] = xr.DataArray( + nudged_longitude, + coords=[self.combined_nc["universals_time"].to_numpy()], + dims={f"nudged_{TIME}"}, + name="nudged_longitude", + ) self.combined_nc["nudged_longitude"].attrs = { "long_name": "Nudged Longitude", "standard_name": "longitude", "units": "degrees_east", - "comment": "Dead reckoned longitude nudged to GPS positions", + "comment": ( + f"Dead reckoned positions from {segment_count} underwater segments " + f"nudged to GPS positions" + ), } - self.combined_nc["nudged_latitude"] = nudged_latitude + self.combined_nc["nudged_latitude"] = xr.DataArray( + nudged_latitude, + coords=[self.combined_nc["universals_time"].to_numpy()], + dims={f"nudged_{TIME}"}, + name="nudged_latitude", + ) self.combined_nc["nudged_latitude"].attrs = { "long_name": "Nudged Latitude", "standard_name": "latitude", "units": "degrees_north", - "comment": "Dead reckoned latitude nudged to GPS positions", + "comment": ( + f"Dead reckoned positions from {segment_count} underwater segments " + f"nudged to GPS positions" + ), } def combine_groups(self): @@ -625,6 +662,9 @@ def combine_groups(self): # Add consolidation comment if applicable self._add_consolidation_comment(time_info) + # Collect variable coordinate mapping by group, which can be flattened + self.variable_time_coord_mapping.update(time_info["variable_time_coord_mapping"]) + # Write intermediate file for cf_xarray decoding intermediate_file = self._intermediate_write_netcdf() with xr.open_dataset(intermediate_file, decode_cf=True) as ds: @@ -634,7 +674,7 @@ def combine_groups(self): self._add_nudged_coordinates() # Clean up intermediate file - ##Path(intermediate_file).unlink() + Path(intermediate_file).unlink() def _intermediate_write_netcdf(self) -> None: """Write out an intermediate combined netCDF file so that data can be From 63af3a1775cfe38519e8265339a1c06338cf0571 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 18 Nov 2025 12:55:43 -0800 Subject: [PATCH 068/121] Add group_name to the global metadata. --- src/data/nc42netcdfs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index ed778f82..f4f5f51d 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -1021,6 +1021,7 @@ def global_metadata(self, log_file: str, group_name: str): f" using git commit {gitcommit} from" f" software at 'https://github.com/mbari-org/auv-python'" ) + metadata["group_name"] = group_name metadata["summary"] = ( "Observational oceanographic data obtained from a Long Range Autonomous" " Underwater Vehicle mission with measurements at original sampling" From a6880c0260147811caf3a209dc1ae4a0be814771 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 18 Nov 2025 16:49:16 -0800 Subject: [PATCH 069/121] Fixup metadata - use mapping from global variable_time_coord_mapping attribute for coordinates. --- src/data/align.py | 154 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 116 insertions(+), 38 deletions(-) diff --git a/src/data/align.py b/src/data/align.py index 703c0edb..81cf28fe 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -13,6 +13,7 @@ __copyright__ = "Copyright 2021, Monterey Bay Aquarium Research Institute" import argparse +import json import logging import os import re @@ -113,10 +114,17 @@ def global_metadata(self): f" host {actual_hostname} using git commit {gitcommit} from" f" software at 'https://github.com/mbari-org/auv-python'" ) + metadata["summary"] = ( + "Observational oceanographic data obtained from an Autonomous" + " Underwater Vehicle mission with measurements at" + " original sampling intervals. The data have been calibrated" + " and the coordinate variables aligned using MBARI's auv-python" + " software." + ) elif self.args.log_file: metadata["title"] = ( f"Combined and aligned LRAUV instrument data from" - f" log file {Path(self.args.log_file).name}" + f" log file {Path(self.args.log_file)}" ) from_data = "combined data" metadata["source"] = ( @@ -125,32 +133,38 @@ def global_metadata(self): f" host {actual_hostname} using git commit {gitcommit} from" f" software at 'https://github.com/mbari-org/auv-python'" ) - metadata["summary"] = ( - "Observational oceanographic data obtained from an Autonomous" - " Underwater Vehicle mission with measurements at" - " original sampling intervals. The data have been calibrated" - " and the coordinate variables aligned using MBARI's auv-python" - " software." - ) + metadata["summary"] = ( + "Observational oceanographic data obtained from an Autonomous" + " Underwater Vehicle mission with measurements at" + " original sampling intervals. The position variables have been" + " corrected to GPS positions and aligned with the data variables" + " using MBARI's auv-python software." + ) # Append location of original data files to summary if self.args.auv_name and self.args.mission: matches = re.search( "(" + SUMMARY_SOURCE.replace("{}", r".+$") + ")", self.calibrated_nc.attrs["summary"], ) + metadata["comment"] = ( + f"MBARI Dorado-class AUV data produced from calibrated data" + f" with execution of '{self.commandline}' at {iso_now} on" + f" host {gethostname()}. Software available at" + f" 'https://github.com/mbari-org/auv-python'" + ) elif self.args.log_file: matches = re.search( "(" + SUMMARY_SOURCE.replace("{}", r".+$") + ")", self.combined_nc.attrs["summary"], ) + metadata["comment"] = ( + f"MBARI LRAUV-class AUV data produced from logged data" + f" with execution of '{self.commandline}' at {iso_now} on" + f" host {gethostname()}. Software available at" + f" 'https://github.com/mbari-org/auv-python'" + ) if matches: metadata["summary"] += " " + matches.group(1) - metadata["comment"] = ( - f"MBARI Dorado-class AUV data produced from calibrated data" - f" with execution of '{self.commandline}' at {iso_now} on" - f" host {gethostname()}. Software available at" - f" 'https://github.com/mbari-org/auv-python'" - ) return metadata @@ -474,11 +488,8 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 "Processing %s with group %s and time %s", variable, group_name, timevar ) - # Copy the original variable - self.aligned_nc[variable] = self.combined_nc[variable] - # Get the time index for this variable - var_time = self.aligned_nc[variable].get_index(timevar).view(np.int64).tolist() + var_time = self.combined_nc[variable].get_index(timevar).view(np.int64).tolist() # Calculate sampling rate sample_rate = np.round( @@ -486,20 +497,6 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 decimals=2, ) - # Create aligned variable with proper attributes - self.aligned_nc[variable] = xr.DataArray( - self.combined_nc[variable].values, - dims={timevar}, - coords=[self.combined_nc[variable].get_index(timevar)], - name=variable, - ) - self.aligned_nc[variable].attrs = self.combined_nc[variable].attrs - self.aligned_nc[variable].attrs["coordinates"] = ( - f"{group_name}_time {group_name}_depth {group_name}_latitude {group_name}_longitude" - ) - self.logger.info("%s: instrument_sample_rate_hz = %.2f", variable, sample_rate) - self.aligned_nc[variable].attrs["instrument_sample_rate_hz"] = sample_rate - # Create interpolated coordinate variables for this group coord_names = ["depth", "latitude", "longitude"] coord_interps = [depth_interp, lat_interp, lon_interp] @@ -525,7 +522,7 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 self.aligned_nc[coord_var_name].attrs["long_name"] = coord_name.title() self.aligned_nc[coord_var_name].attrs["instrument_sample_rate_hz"] = sample_rate - if coord_name in ["latitude", "longitude"]: + if coord_name in ["longitude", "latitude", "depth"]: self.aligned_nc[coord_var_name].attrs["comment"] = ( self.aligned_nc[coord_var_name].attrs.get("comment", "") + f". Variable {coord_source} from {src_file} file linearly" @@ -542,11 +539,69 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 ) > pd.to_datetime(self.max_time): self.max_time = pd.to_datetime(self.aligned_nc[timevar][-1].values).tz_localize(UTC) - # Update bounds using the interpolated coordinates - depth_coord = f"{group_name}_depth" - lat_coord = f"{group_name}_latitude" - lon_coord = f"{group_name}_longitude" + # Coordinates - use mapping from global variable_time_coord_mapping attribute + variable_time_coord_mapping = json.loads( + self.combined_nc.attrs.get("variable_time_coord_mapping", "{}") + ) + time_coord = variable_time_coord_mapping.get(variable) + depth_coord = ( + time_coord[:-5] + "_depth" + if time_coord and time_coord.endswith("_time") + else f"{group_name}_depth" + ) + lat_coord = ( + time_coord[:-5] + "_latitude" + if time_coord and time_coord.endswith("_time") + else f"{group_name}_latitude" + ) + lon_coord = ( + time_coord[:-5] + "_longitude" + if time_coord and time_coord.endswith("_time") + else f"{group_name}_longitude" + ) + + # Add interpolated depth, latitude, and longitude variables + if depth_coord in self.combined_nc: + self.aligned_nc[depth_coord].attrs = self.combined_nc[depth_coord].attrs + self.aligned_nc[depth_coord] = xr.DataArray( + depth_interp(var_time).astype(np.float64).tolist(), + dims={timevar}, + coords=[self.combined_nc[variable].get_index(timevar)], + name=depth_coord, + ) + self.aligned_nc[depth_coord].attrs["long_name"] = "Depth" + self.aligned_nc[depth_coord].attrs["comment"] = "depth from Group_Universals.nc" + self.aligned_nc[depth_coord].attrs["instrument_sample_rate_hz"] = sample_rate + self.aligned_nc[lat_coord] = xr.DataArray( + lat_interp(var_time).astype(np.float64).tolist(), + dims={timevar}, + coords=[self.combined_nc[variable].get_index(timevar)], + name=lat_coord, + ) + self.aligned_nc[lat_coord].attrs = self.combined_nc["nudged_latitude"].attrs + self.aligned_nc[lat_coord].attrs["comment"] += ( + f". Variable nudged_latitude from {src_file} file linearly" + f" interpolated onto {variable.split('_')[0]} time values." + ) + self.aligned_nc[lat_coord].attrs["long_name"] = "Latitude" + self.aligned_nc[lat_coord].attrs["instrument_sample_rate_hz"] = sample_rate + + self.aligned_nc[lon_coord] = xr.DataArray( + lon_interp(var_time).astype(np.float64).tolist(), + dims={timevar}, + coords=[self.combined_nc[variable].get_index(timevar)], + name=lon_coord, + ) + self.aligned_nc[lon_coord].attrs = self.combined_nc["nudged_longitude"].attrs + self.aligned_nc[lon_coord].attrs["comment"] += ( + f". Variable nudged_longitude from {src_file} file linearly" + f" interpolated onto {variable.split('_')[0]} time values." + ) + self.aligned_nc[lon_coord].attrs["long_name"] = "Longitude" + self.aligned_nc[lon_coord].attrs["instrument_sample_rate_hz"] = sample_rate + + # Update bounds using the interpolated coordinates if self.aligned_nc[depth_coord].min() < self.min_depth: self.min_depth = self.aligned_nc[depth_coord].min().to_numpy() if self.aligned_nc[depth_coord].max() > self.max_depth: @@ -560,6 +615,29 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 if self.aligned_nc[lon_coord].max() > self.max_lon: self.max_lon = self.aligned_nc[lon_coord].max().to_numpy() + # Create aligned variable with proper attributes + self.aligned_nc[variable] = xr.DataArray( + self.combined_nc[variable].values, + dims={timevar}, + coords=[self.combined_nc[variable].get_index(timevar)], + name=variable, + ) + self.aligned_nc[variable].attrs = self.combined_nc[variable].attrs + if ( + time_coord in self.aligned_nc + and depth_coord in self.aligned_nc + and lat_coord in self.aligned_nc + and lon_coord in self.aligned_nc + ): + self.aligned_nc[variable].attrs["coordinates"] = ( + f"{time_coord} {depth_coord} {lat_coord} {lon_coord}" + ) + else: + self.logger.info("Skipping setting coordinates attribute for %s", variable) + + self.logger.info("%s: instrument_sample_rate_hz = %.2f", variable, sample_rate) + self.aligned_nc[variable].attrs["instrument_sample_rate_hz"] = sample_rate + return netcdfs_dir def write_combined_netcdf( @@ -595,7 +673,7 @@ def write_netcdf(self, netcdfs_dir, vehicle: str = "", name: str = "") -> None: self.logger.debug("Removing file %s", out_fn) out_fn.unlink() self.aligned_nc.to_netcdf(out_fn) - self.logger.info( + self.logger.debug( "Data variables written: %s", ", ".join(sorted(self.aligned_nc.variables)), ) From 5b05b0dc69b23af44303f15fcbc16c2975a6d727 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 18 Nov 2025 16:49:37 -0800 Subject: [PATCH 070/121] Testing process_lrauv. --- .vscode/launch.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index ee820b82..64c6b5d0 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -175,13 +175,14 @@ //"args": ["--auv_name", "dorado", "--mission", "2017.044.00", "-v", "1"] //"args": ["--auv_name", "dorado", "--mission", "2021.102.02", "-v", "1"] //"args": ["--auv_name", "dorado", "--mission", "2004.236.00", "-v", "1"] - "args": ["--auv_name", "dorado", "--mission", "2023.192.01", "-v", "1"] + //"args": ["--auv_name", "dorado", "--mission", "2023.192.01", "-v", "1"] //"args": ["--auv_name", "i2map", "--mission", "2019.157.02", "-v", "2", "--plot", "--plot_seconds", "82000"], //"args": ["--auv_name", "dorado", "--mission", "2021.102.02", "-v", "1", "--flash_threshold", "1.5e10"], //"args": ["--auv_name", "dorado", "--mission", "2024.317.01", "-v", "1"], //"args": ["--auv_name", "dorado", "--mission", "2010.341.00", "-v", "1", "--plot", "--plot_seconds", "82000"], //"args": ["--auv_name", "dorado", "--mission", "2020.337.00", "-v", "1"], //"args": ["--auv_name", "dorado", "--mission", "2023.123.00", "-v", "1"], + "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] }, { "name": "5.0 - archive.py", @@ -333,7 +334,7 @@ //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber"] //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber", "--no_cleanup"] - "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup"] + "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup"] //"args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901", "--end", "20121101", "--noinput"] }, From e7ef537d8e7f00f80506c3fe258fa35a66d39b09 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 18 Nov 2025 16:50:14 -0800 Subject: [PATCH 071/121] Get working for process_lrauv. --- src/data/process.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/data/process.py b/src/data/process.py index b3a6e2e1..5c0d83a4 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -308,6 +308,7 @@ def align(self, mission: str = "", log_file: str = "") -> None: align_netcdf.args.base_path = self.args.base_path align_netcdf.args.auv_name = self.vehicle align_netcdf.args.mission = mission + align_netcdf.args.log_file = self.args.log_file align_netcdf.args.plot = None align_netcdf.args.verbose = self.args.verbose align_netcdf.logger.setLevel(self._log_levels[self.args.verbose]) @@ -316,9 +317,10 @@ def align(self, mission: str = "", log_file: str = "") -> None: try: if log_file: netcdf_dir = align_netcdf.process_combined(log_file=log_file) + align_netcdf.write_combined_netcdf(netcdf_dir, log_file=log_file) else: netcdf_dir = align_netcdf.process_cal() - align_netcdf.write_netcdf(netcdf_dir) + align_netcdf.write_netcdf(netcdf_dir, vehicle=self.vehicle, mission=mission) except (FileNotFoundError, EOFError) as e: align_netcdf.logger.error("%s %s", mission, e) # noqa: TRY400 error_message = f"{mission} {e}" @@ -326,12 +328,13 @@ def align(self, mission: str = "", log_file: str = "") -> None: finally: align_netcdf.logger.removeHandler(self.log_handler) - def resample(self, mission: str) -> None: + def resample(self, mission: str = "") -> None: self.logger.info("Resampling steps for %s", mission) resamp = Resampler() resamp.args = argparse.Namespace() resamp.args.auv_name = self.vehicle resamp.args.mission = mission + resamp.args.log_file = self.args.log_file resamp.args.plot = None resamp.args.freq = self.args.freq resamp.args.mf_width = self.args.mf_width @@ -341,13 +344,17 @@ def resample(self, mission: str) -> None: resamp.logger.setLevel(self._log_levels[self.args.verbose]) resamp.logger.addHandler(self.log_handler) file_name = f"{resamp.args.auv_name}_{resamp.args.mission}_align.nc" - nc_file = Path( - self.args.base_path, - resamp.args.auv_name, - MISSIONNETCDFS, - resamp.args.mission, - file_name, - ) + if resamp.args.log_file: + netcdfs_dir = Path(BASE_LRAUV_PATH, Path(resamp.args.log_file).parent) + nc_file = Path(netcdfs_dir, f"{Path(resamp.args.log_file).stem}_align.nc") + else: + nc_file = Path( + self.args.base_path, + resamp.args.auv_name, + MISSIONNETCDFS, + resamp.args.mission, + file_name, + ) if self.args.flash_threshold and self.args.resample: self.logger.info( "Executing only resample step to produce netCDF file with flash_threshold = %s", @@ -373,7 +380,7 @@ def resample(self, mission: str) -> None: try: resamp.resample_mission(nc_file) except FileNotFoundError as e: - self.logger.error("%s %s", mission, e) # noqa: TRY400 + self.logger.error("%s %s", nc_file, e) # noqa: TRY400 finally: resamp.logger.removeHandler(self.log_handler) @@ -763,6 +770,7 @@ def combine(self, log_file: str) -> None: ) combine = Combine_NetCDF() combine.args = argparse.Namespace() + combine.args.plot = None combine.args.verbose = self.args.verbose combine.args.log_file = self.args.log_file combine.commandline = self.commandline @@ -790,7 +798,7 @@ def process_log_file(self, log_file: str) -> None: netcdfs_dir = self.extract(log_file) self.combine(log_file=log_file) self.align(log_file=log_file) - # self.resample(log_file) + self.resample() # self.create_products(log_file) self.logger.info("Finished processing log file: %s", log_file) From be840a5a6e437662fbbf66e0b0a81a9708803c59 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 18 Nov 2025 16:51:44 -0800 Subject: [PATCH 072/121] Modify for reuse with LRAUV _align.nc data. --- src/data/resample.py | 63 +++++++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 18 deletions(-) diff --git a/src/data/resample.py b/src/data/resample.py index 34653107..53bbd39c 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -27,6 +27,7 @@ import xarray as xr from dorado_info import dorado_info from logs2netcdfs import BASE_PATH, MISSIONNETCDFS, SUMMARY_SOURCE, TIME, AUV_NetCDF +from nc42netcdfs import BASE_LRAUV_PATH from pysolar.solar import get_altitude from scipy import signal @@ -1006,7 +1007,8 @@ def resample_variable( # noqa: PLR0913 instrs_to_pad: dict[str, timedelta], depth_threshold: float, ) -> None: - timevar = f"{instr}_{TIME}" + # Get the time variable name from the dimension of the variable + timevar = self.ds[variable].dims[0] if instr == "biolume" and variable == "biolume_raw": # Only biolume_avg_biolume and biolume_flow treated like other data # All other biolume variables in self.df_r[] are computed from biolume_raw @@ -1135,16 +1137,24 @@ def get_mission_start_end( mission_start = datetime.max # noqa: DTZ901 mission_end = datetime.min # noqa: DTZ901 instrs_to_pad = {} + self.logger.info("Determining mission start and end times") + time_coords = [] for instr in self.instruments_variables(nc_file): time_coord = f"{instr}_{TIME}" - mission_start = min(pd.to_datetime(self.ds[time_coord].min().values), mission_start) - mission_end = max(pd.to_datetime(self.ds[time_coord].max().values), mission_end) - for instr in self.instruments_variables(nc_file): - time_coord = f"{instr}_{TIME}" + try: + mission_start = min(pd.to_datetime(self.ds[time_coord].min().values), mission_start) + mission_end = max(pd.to_datetime(self.ds[time_coord].max().values), mission_end) + time_coords.append(time_coord) + except KeyError: + # Likely an LRAUV _combined.nc file with multiple different dimensions in a Group + self.logger.info( + "Ignoring expected time_coord that could not be found: %s", time_coord + ) + for time_coord in time_coords: duration = mission_end - pd.to_datetime(self.ds[time_coord].max().values) self.logger.info( "%-10s: %s to %s (%s before mission_end)", - instr, + time_coord.split("_")[0], self.ds[time_coord].min().values, self.ds[time_coord].max().values, duration, @@ -1152,10 +1162,10 @@ def get_mission_start_end( if mission_end - pd.to_datetime( self.ds[time_coord].max().values, ) > timedelta(minutes=min_crit): - instrs_to_pad[instr] = duration + instrs_to_pad[time_coord.split("_")[0]] = duration self.logger.warning( "Instrument %s has a gap > %d minutes at the end of the mission: %s", - instr, + time_coord.split("_")[0], min_crit, mission_end - pd.to_datetime(self.ds[time_coord].max().values), ) @@ -1199,8 +1209,10 @@ def resample_mission( # noqa: C901, PLR0912, PLR0915, PLR0913 # Use the pitch corrected depth coordinate for 'ctd1' for dorado, # 'seabird25p' for i2map. The depth coordinate for pitch_corrected_instr # must be as complete as possible as it's used for all the other - # nosecone instruments. - pitch_corrected_instr = "ctd1" + # nosecone instruments. If we are processing LRAUV data then + # use 'ctddseabird', otherwise start with 'ctd1' and fall back to + # 'seabird25p' if needed for i2map missions. + pitch_corrected_instr = "ctdseabird" if self.args.log_file else "ctd1" if f"{pitch_corrected_instr}_depth" not in self.ds: pitch_corrected_instr = "seabird25p" if pitch_corrected_instr in instrs_to_pad: @@ -1322,6 +1334,15 @@ def process_command_line(self): help="Mission directory, e.g.: 2020.064.10", ), ) + parser.add_argument( + "--log_file", + action="store", + help=( + "Path to the log file of original LRAUV data, e.g.: " + "brizo/missionlogs/2025/20250903_20250909/" + "20250905T072042/202509050720_202509051653.nc4" + ), + ) parser.add_argument("--plot", action="store_true", help="Plot data") parser.add_argument( "--plot_seconds", @@ -1374,15 +1395,21 @@ def process_command_line(self): if __name__ == "__main__": resamp = Resampler() resamp.process_command_line() - file_name = f"{resamp.args.auv_name}_{resamp.args.mission}_align.nc" - nc_file = Path( - BASE_PATH, - resamp.args.auv_name, - MISSIONNETCDFS, - resamp.args.mission, - file_name, - ) + if resamp.args.log_file: + netcdfs_dir = Path(BASE_LRAUV_PATH, f"{Path(resamp.args.log_file).parent}") + nc_file = Path(netcdfs_dir, f"{Path(resamp.args.log_file).stem}_align.nc") + else: + file_name = f"{resamp.args.auv_name}_{resamp.args.mission}_align.nc" + nc_file = Path( + BASE_PATH, + resamp.args.auv_name, + MISSIONNETCDFS, + resamp.args.mission, + file_name, + ) p_start = time.time() + # Everything that Resampler needs should be in the self described nc_file + # whether it is Dorado/i2MAP or LRAUV resamp.resample_mission( nc_file, mf_width=resamp.args.mf_width, From 10a3c80e824414d940a4eebf54ede3413cfeaa83 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 18 Nov 2025 17:09:33 -0800 Subject: [PATCH 073/121] Fix for the tests. --- src/data/conftest.py | 2 ++ src/data/process.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/data/conftest.py b/src/data/conftest.py index 4f08da02..fd181ce1 100644 --- a/src/data/conftest.py +++ b/src/data/conftest.py @@ -103,6 +103,7 @@ def complete_dorado_processing(): ns.skip_download_process = False ns.num_cores = 1 ns.add_seconds = None + ns.log_file = None ns.verbose = 1 proc.args = ns proc.process_missions(TEST_START_YEAR) @@ -149,6 +150,7 @@ def complete_i2map_processing(): ns.last_n_days = 0 ns.num_cores = 1 ns.add_seconds = None + ns.log_file = None ns.verbose = 1 proc.args = ns proc.process_missions(TEST_START_YEAR) diff --git a/src/data/process.py b/src/data/process.py index 5c0d83a4..cc0a190f 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -320,7 +320,7 @@ def align(self, mission: str = "", log_file: str = "") -> None: align_netcdf.write_combined_netcdf(netcdf_dir, log_file=log_file) else: netcdf_dir = align_netcdf.process_cal() - align_netcdf.write_netcdf(netcdf_dir, vehicle=self.vehicle, mission=mission) + align_netcdf.write_combined_netcdf(netcdf_dir, vehicle=self.vehicle) except (FileNotFoundError, EOFError) as e: align_netcdf.logger.error("%s %s", mission, e) # noqa: TRY400 error_message = f"{mission} {e}" From 09f3a8525f4d61b88d64096ff0e2d6fc45fb0170 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 19 Nov 2025 12:29:00 -0800 Subject: [PATCH 074/121] Remove coordinate standard_name attributes that don't belong in this final file. --- src/data/resample.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/data/resample.py b/src/data/resample.py index 53bbd39c..bf1b9623 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -78,6 +78,17 @@ def _build_global_metadata(self) -> None: ) gitcommit = "" iso_now = datetime.now(tz=UTC).isoformat().split(".")[0] + "Z" + + # Ensure that only the latitude and longitude variables have + # standard_name attributes equal to "latitude" and "longitude" so that + # the .cf[] accessor works correctly + for var in self.resampled_nc.data_vars: + if self.resampled_nc[var].attrs.get("standard_name") in ["latitude", "longitude"]: + if var in {"latitude", "longitude"}: + continue + self.logger.info("Removing standard_name attribute from variable %s", var) + del self.resampled_nc[var].attrs["standard_name"] + # Common dynamic attributes for all auv platforms self.metadata["time_coverage_start"] = str(min(self.resampled_nc.time.values)) self.metadata["time_coverage_end"] = str(max(self.resampled_nc.time.values)) @@ -1050,7 +1061,7 @@ def resample_variable( # noqa: PLR0913 .resample(freq.lower()) .mean() ) - self.df_r[variable].loc[instr_data.index] = instr_data + self.df_r.loc[instr_data.index, variable] = instr_data else: self.df_r[variable] = ( self.df_o[f"{variable}_mf"] From eceaf3dcf8a0d26c8b3486723150244831e26586 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 19 Nov 2025 12:31:18 -0800 Subject: [PATCH 075/121] Miscellaneous fixes for LRAUV archive step. --- .vscode/launch.json | 3 ++- src/data/AUV.py | 2 +- src/data/archive.py | 17 +++++++++++++++-- src/data/process.py | 2 +- 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 64c6b5d0..48d7a853 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -334,7 +334,8 @@ //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber"] //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber", "--no_cleanup"] - "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup"] + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup"] + "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup", "--clobber"] //"args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901", "--end", "20121101", "--noinput"] }, diff --git a/src/data/AUV.py b/src/data/AUV.py index 8dd913e1..cffa6fe8 100755 --- a/src/data/AUV.py +++ b/src/data/AUV.py @@ -256,7 +256,7 @@ def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 seg_min = float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9 / 60 logger.info( - f"{seg_count + 1:4d}: {'-':>12} {'-':>12} {'-':>12} {len(segi):-9d} {seg_min:9.2f} {'-':>14} {'-':>14}", # noqa: E501, G004 + f"{seg_count + 1:5d}: {'-':>12} {'-':>12} {'-':>12} {len(segi):-9d} {seg_min:9.2f} {'-':>14} {'-':>14}", # noqa: E501, G004 ) segment_count = seg_count segment_minsum = seg_minsum diff --git a/src/data/archive.py b/src/data/archive.py index 2352d5bf..0593923a 100755 --- a/src/data/archive.py +++ b/src/data/archive.py @@ -172,7 +172,7 @@ def copy_to_AUVTCD(self, nc_file_base: Path, freq: str = FREQ) -> None: # noqa: def copy_to_M3(self, resampled_nc_file: str) -> None: pass - def copy_to_LRAUV(self, log_file: str, freq: str = FREQ) -> None: + def copy_to_LRAUV(self, log_file: str, freq: str = FREQ) -> None: # noqa: C901, PLR0912 "Copy the intermediate and resampled netCDF file(s) to the archive LRAUV location" src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) dst_dir = Path(LRAUV_VOL, Path(log_file).parent) @@ -196,7 +196,7 @@ def copy_to_LRAUV(self, log_file: str, freq: str = FREQ) -> None: "%-75s exists, but is not being archived because --clobber is not specified.", src_file.name, ) - for ftype in (f"{freq}.nc", "cal.nc", "align.nc"): + for ftype in (f"{freq}.nc", "combined.nc", "align.nc"): src_file = Path(src_dir, f"{Path(log_file).stem}_{ftype}") dst_file = Path(dst_dir, src_file.name) if self.args.clobber: @@ -211,6 +211,19 @@ def copy_to_LRAUV(self, log_file: str, freq: str = FREQ) -> None: "%-36s exists, but is not being archived because --clobber is not specified.", # noqa: E501 src_file.name, ) + # Copy the processing.log file last so that we get everything + src_file = Path(src_dir, f"{Path(log_file).stem}_{LOG_NAME}") + dst_file = Path(dst_dir, src_file.name) + if src_file.exists(): + if self.args.clobber: + self.logger.info("copyfile %s %s", src_file, dst_dir) + shutil.copyfile(src_file, dst_file) + self.logger.info("copyfile %s %s done.", src_file, dst_dir) + else: + self.logger.info( + "%26s exists, but is not being archived because --clobber is not specified.", # noqa: E501 + src_file.name, + ) def process_command_line(self): parser = argparse.ArgumentParser( diff --git a/src/data/process.py b/src/data/process.py index cc0a190f..949b366f 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -785,7 +785,7 @@ def process_log_file(self, log_file: str) -> None: netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) Path(netcdfs_dir).mkdir(parents=True, exist_ok=True) self.log_handler = logging.FileHandler( - Path(BASE_LRAUV_PATH, f"{log_file}_extract.log"), mode="w+" + Path(netcdfs_dir, f"{Path(log_file).stem}_processing.log"), mode="w+" ) self.log_handler.setLevel(self._log_levels[self.args.verbose]) self.log_handler.setFormatter(AUV_NetCDF._formatter) From 1e605a8a83c3fd9a1e91194ad49437043d14d6bf Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 19 Nov 2025 14:46:49 -0800 Subject: [PATCH 076/121] Replace "vehicle" with "auv_name" so that auv_name is used consistently. --- src/data/process.py | 58 +++++++++++++++++------------------ src/data/process_Dorado389.py | 4 +-- src/data/process_dorado.py | 4 +-- src/data/process_i2map.py | 4 +-- src/data/process_lrauv.py | 4 +-- 5 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/data/process.py b/src/data/process.py index 949b366f..e059137f 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -132,12 +132,12 @@ class Processor: logger.addHandler(_handler) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) - def __init__(self, vehicle, vehicle_dir, mount_dir, calibration_dir) -> None: + def __init__(self, auv_name, vehicle_dir, mount_dir, calibration_dir) -> None: # Variables to be set by subclasses, e.g.: - # vehicle = "i2map" + # auv_name = "i2map" # vehicle_dir = "/Volumes/M3/master/i2MAP" # mount_dir = "smb://thalassa.shore.mbari.org/M3" - self.vehicle = vehicle + self.auv_name = auv_name self.vehicle_dir = vehicle_dir self.mount_dir = mount_dir self.calibration_dir = calibration_dir @@ -187,14 +187,14 @@ def get_mission_dir(self, mission: str) -> str: self.logger.error("%s does not exist.", self.vehicle_dir) self.logger.info("Is %s mounted?", self.mount_dir) sys.exit(1) - if self.vehicle.lower() == "dorado" or self.vehicle == "Dorado389": + if self.auv_name.lower() == "dorado" or self.auv_name == "Dorado389": if self.args.local: path = Path(self.vehicle_dir, mission) else: year = mission.split(".")[0] yearyd = "".join(mission.split(".")[:2]) path = Path(self.vehicle_dir, year, yearyd, mission) - elif self.vehicle.lower() == "i2map": + elif self.auv_name.lower() == "i2map": year = int(mission.split(".")[0]) # Could construct the YYYY/MM/YYYYMMDD path on M3/Master # but use the mission_list() method to find the mission dir instead @@ -205,8 +205,8 @@ def get_mission_dir(self, mission: str) -> str: self.logger.error("Cannot find %s in %s", mission, self.vehicle_dir) error_message = f"Cannot find {mission} in {self.vehicle_dir}" raise FileNotFoundError(error_message) - elif self.vehicle == "Dorado389": - # The Dorado389 vehicle is a special case used for testing locally and in CI + elif self.auv_name == "Dorado389": + # The Dorado389 auv_name is a special case used for testing locally and in CI path = self.vehicle_dir if not Path(path).exists(): self.logger.error("%s does not exist.", path) @@ -223,7 +223,7 @@ def download_process(self, mission: str, src_dir: str) -> None: auv_netcdf.args.noinput = self.args.noinput auv_netcdf.args.clobber = self.args.clobber auv_netcdf.args.noreprocess = self.args.noreprocess - auv_netcdf.args.auv_name = self.vehicle + auv_netcdf.args.auv_name = self.auv_name auv_netcdf.args.mission = mission auv_netcdf.args.use_portal = self.args.use_portal auv_netcdf.args.add_seconds = self.args.add_seconds @@ -238,7 +238,7 @@ def download_process(self, mission: str, src_dir: str) -> None: # Run lopcToNetCDF.py - mimic log message from logs2netcdfs.py lopc_bin = Path( self.args.base_path, - self.vehicle, + self.auv_name, MISSIONLOGS, mission, "lopc.bin", @@ -246,7 +246,7 @@ def download_process(self, mission: str, src_dir: str) -> None: try: file_size = Path(lopc_bin).stat().st_size except FileNotFoundError: - if "lopc" in EXPECTED_SENSORS[self.vehicle]: + if "lopc" in EXPECTED_SENSORS[self.auv_name]: self.logger.warning("No lopc.bin file for %s", mission) return self.logger.info("Processing file %s (%d bytes)", lopc_bin, file_size) @@ -255,7 +255,7 @@ def download_process(self, mission: str, src_dir: str) -> None: lopc_processor.args.bin_fileName = lopc_bin lopc_processor.args.netCDF_fileName = os.path.join( # noqa: PTH118 This is an arg, keep it a string self.args.base_path, - self.vehicle, + self.auv_name, MISSIONNETCDFS, mission, "lopc.nc", @@ -286,7 +286,7 @@ def calibrate(self, mission: str) -> None: cal_netcdf.args.noinput = self.args.noinput cal_netcdf.args.clobber = self.args.clobber cal_netcdf.args.noreprocess = self.args.noreprocess - cal_netcdf.args.auv_name = self.vehicle + cal_netcdf.args.auv_name = self.auv_name cal_netcdf.args.mission = mission cal_netcdf.args.plot = None cal_netcdf.calibration_dir = self.calibration_dir @@ -306,7 +306,7 @@ def align(self, mission: str = "", log_file: str = "") -> None: align_netcdf = Align_NetCDF() align_netcdf.args = argparse.Namespace() align_netcdf.args.base_path = self.args.base_path - align_netcdf.args.auv_name = self.vehicle + align_netcdf.args.auv_name = self.auv_name align_netcdf.args.mission = mission align_netcdf.args.log_file = self.args.log_file align_netcdf.args.plot = None @@ -320,7 +320,7 @@ def align(self, mission: str = "", log_file: str = "") -> None: align_netcdf.write_combined_netcdf(netcdf_dir, log_file=log_file) else: netcdf_dir = align_netcdf.process_cal() - align_netcdf.write_combined_netcdf(netcdf_dir, vehicle=self.vehicle) + align_netcdf.write_combined_netcdf(netcdf_dir, vehicle=self.auv_name) except (FileNotFoundError, EOFError) as e: align_netcdf.logger.error("%s %s", mission, e) # noqa: TRY400 error_message = f"{mission} {e}" @@ -332,7 +332,7 @@ def resample(self, mission: str = "") -> None: self.logger.info("Resampling steps for %s", mission) resamp = Resampler() resamp.args = argparse.Namespace() - resamp.args.auv_name = self.vehicle + resamp.args.auv_name = self.auv_name resamp.args.mission = mission resamp.args.log_file = self.args.log_file resamp.args.plot = None @@ -396,7 +396,7 @@ def archive( If log_file is provided, archive the processed data for LRAUV class vehicles.""" arch = Archiver(add_logger_handlers) arch.args = argparse.Namespace() - arch.args.auv_name = self.vehicle + arch.args.auv_name = self.auv_name arch.mount_dir = self.mount_dir arch.args.mission = mission arch.commandline = self.commandline @@ -441,7 +441,7 @@ def create_products(self, mission: str) -> None: cp = CreateProducts() cp.args = argparse.Namespace() cp.args.base_path = self.args.base_path - cp.args.auv_name = self.vehicle + cp.args.auv_name = self.auv_name cp.args.mission = mission cp.args.local = self.args.local cp.args.start_esecs = None @@ -459,7 +459,7 @@ def email(self, mission: str) -> None: self.logger.info("Sending notification email for %s", mission) email = Emailer() email.args = argparse.Namespace() - email.args.auv_name = self.vehicle + email.args.auv_name = self.auv_name email.args.mission = mission email.commandline = self.commandline email.args.clobber = self.args.clobber @@ -495,10 +495,10 @@ def cleanup(self, mission: str = None, log_file: str = None) -> None: ) try: shutil.rmtree( - Path(self.args.base_path, self.vehicle, MISSIONLOGS, mission), + Path(self.args.base_path, self.auv_name, MISSIONLOGS, mission), ) shutil.rmtree( - Path(self.args.base_path, self.vehicle, MISSIONNETCDFS, mission), + Path(self.args.base_path, self.auv_name, MISSIONNETCDFS, mission), ) self.logger.info("Done removing %s work files", mission) except FileNotFoundError as e: @@ -524,7 +524,7 @@ def cleanup(self, mission: str = None, log_file: str = None) -> None: def process_mission(self, mission: str, src_dir: str = "") -> None: # noqa: C901, PLR0912, PLR0915 netcdfs_dir = Path( self.args.base_path, - self.vehicle, + self.auv_name, MISSIONNETCDFS, mission, ) @@ -535,7 +535,7 @@ def process_mission(self, mission: str, src_dir: str = "") -> None: # noqa: C90 self.cleanup(mission) Path(netcdfs_dir).mkdir(parents=True, exist_ok=True) self.log_handler = logging.FileHandler( - Path(netcdfs_dir, f"{self.vehicle}_{mission}_{LOG_NAME}"), + Path(netcdfs_dir, f"{self.auv_name}_{mission}_{LOG_NAME}"), mode="w+", ) self.log_handler.setLevel(self._log_levels[self.args.verbose]) @@ -547,12 +547,12 @@ def process_mission(self, mission: str, src_dir: str = "") -> None: # noqa: C90 self.logger.info("commandline = %s", self.commandline) try: program = "" - if self.vehicle.lower() == "dorado": + if self.auv_name.lower() == "dorado": program = dorado_info[mission]["program"] self.logger.info( 'dorado_info[mission]["comment"] = %s', dorado_info[mission]["comment"] ) - elif self.vehicle.lower() == "i2map": + elif self.auv_name.lower() == "i2map": program = "i2map" if program == TEST: error_message = ( @@ -660,10 +660,10 @@ def process_mission_exception_wrapper( if hasattr(self, "log_handler"): # If no log_handler then process_mission() failed, likely due to missing mount # Always archive the mission, especially the processing.log file - if self.vehicle == "Dorado389" and mission == "2011.256.02": + if self.auv_name == "Dorado389" and mission == "2011.256.02": self.logger.info( "Not archiving %s %s as it's likely CI testing", - self.vehicle, + self.auv_name, mission, ) if self.args.download_process: @@ -806,7 +806,7 @@ def process_log_files(self) -> None: if self.args.log_file: # log_file is string like: # brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4 - self.vehicle = self.args.log_file.split("/")[0].lower() + self.auv_name = self.args.log_file.split("/")[0].lower() self.process_log_file(self.args.log_file) def process_command_line(self): @@ -1028,12 +1028,12 @@ def process_command_line(self): if __name__ == "__main__": - VEHICLE = "i2map" + AUV_NAME = "i2map" VEHICLE_DIR = "/Volumes/M3/master/i2MAP" CALIBRATION_DIR = "/Volumes/DMO/MDUC_CORE_CTD_200103/Calibration Files" MOUNT_DIR = "smb://thalassa.shore.mbari.org/M3" # Initialize for i2MAP processing, meant to be subclassed for other vehicles - proc = Processor(VEHICLE, VEHICLE_DIR, MOUNT_DIR, CALIBRATION_DIR) + proc = Processor(AUV_NAME, VEHICLE_DIR, MOUNT_DIR, CALIBRATION_DIR) proc.process_command_line() proc.process_missions() diff --git a/src/data/process_Dorado389.py b/src/data/process_Dorado389.py index 990494f4..eac7caf6 100755 --- a/src/data/process_Dorado389.py +++ b/src/data/process_Dorado389.py @@ -17,12 +17,12 @@ class DoradoProcessor(Processor): if __name__ == "__main__": - VEHICLE = "Dorado389" + AUV_NAME = "Dorado389" VEHICLE_DIR = "/Volumes/AUVCTD/missionlogs" CALIBRATION_DIR = "/Volumes/DMO/MDUC_CORE_CTD_200103/Calibration Files" MOUNT_DIR = "smb://atlas.shore.mbari.org/AUVCTD" START_YEAR = 2011 - proc = DoradoProcessor(VEHICLE, VEHICLE_DIR, MOUNT_DIR, CALIBRATION_DIR) + proc = DoradoProcessor(AUV_NAME, VEHICLE_DIR, MOUNT_DIR, CALIBRATION_DIR) proc.process_command_line() proc.process_missions(START_YEAR) diff --git a/src/data/process_dorado.py b/src/data/process_dorado.py index aaee26db..a60c0e3b 100755 --- a/src/data/process_dorado.py +++ b/src/data/process_dorado.py @@ -30,12 +30,12 @@ class DoradoProcessor(Processor): if __name__ == "__main__": - VEHICLE = "dorado" + AUV_NAME = "dorado" VEHICLE_DIR = "/Volumes/AUVCTD/missionlogs" CALIBRATION_DIR = "/Volumes/DMO/MDUC_CORE_CTD_200103/Calibration Files" MOUNT_DIR = "smb://atlas.shore.mbari.org/AUVCTD" START_YEAR = 2003 - proc = DoradoProcessor(VEHICLE, VEHICLE_DIR, MOUNT_DIR, CALIBRATION_DIR) + proc = DoradoProcessor(AUV_NAME, VEHICLE_DIR, MOUNT_DIR, CALIBRATION_DIR) proc.process_command_line() proc.process_missions(START_YEAR) diff --git a/src/data/process_i2map.py b/src/data/process_i2map.py index e2517558..c6ee2247 100755 --- a/src/data/process_i2map.py +++ b/src/data/process_i2map.py @@ -29,12 +29,12 @@ class I2mapProcessor(Processor): if __name__ == "__main__": - VEHICLE = "i2map" + AUV_NAME = "i2map" VEHICLE_DIR = "/Volumes/M3/master/i2MAP" CALIBRATION_DIR = "/Volumes/DMO/MDUC_CORE_CTD_200103/Calibration Files" MOUNT_DIR = "smb://thalassa.shore.mbari.org/M3" START_YEAR = 2017 - proc = I2mapProcessor(VEHICLE, VEHICLE_DIR, MOUNT_DIR, CALIBRATION_DIR) + proc = I2mapProcessor(AUV_NAME, VEHICLE_DIR, MOUNT_DIR, CALIBRATION_DIR) proc.process_command_line() proc.process_missions(START_YEAR) diff --git a/src/data/process_lrauv.py b/src/data/process_lrauv.py index 7a99f92b..b3db134e 100755 --- a/src/data/process_lrauv.py +++ b/src/data/process_lrauv.py @@ -30,7 +30,7 @@ class LRAUVProcessor(Processor): if __name__ == "__main__": - VEHICLE = "tethys" + AUV_NAME = "tethys" LRAUV_DIR = "/Volumes/LRAUV" # It's possible that we might need calibration files for some sensors # in the future, so point to a potential directory where they can be found. @@ -38,6 +38,6 @@ class LRAUVProcessor(Processor): MOUNT_DIR = "smb://atlas.shore.mbari.org/LRAUV" START_YEAR = 2012 - proc = LRAUVProcessor(VEHICLE, LRAUV_DIR, MOUNT_DIR, CALIBRATION_DIR) + proc = LRAUVProcessor(AUV_NAME, LRAUV_DIR, MOUNT_DIR, CALIBRATION_DIR) proc.process_command_line() proc.process_log_files() From a19a35124d4ab9e960c88be6e4019d0bc641d294 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 20 Nov 2025 09:34:19 -0800 Subject: [PATCH 077/121] Factored all the options into _CONFIG_SCHEMA and use a config pattern for sharing. NGL, I used Claude/Sonnet 4 to help me do this. The pattern is mostly implemented in process.py and its subclasses. Changes made also in conftest.py to help decouple testing from routine code changes. --- src/data/conftest.py | 144 +++++++------- src/data/process.py | 351 ++++++++++++++++++++-------------- src/data/process_Dorado389.py | 8 +- src/data/process_dorado.py | 8 +- src/data/process_i2map.py | 8 +- src/data/process_lrauv.py | 9 +- 6 files changed, 303 insertions(+), 225 deletions(-) diff --git a/src/data/conftest.py b/src/data/conftest.py index fd181ce1..3486953f 100644 --- a/src/data/conftest.py +++ b/src/data/conftest.py @@ -11,6 +11,53 @@ from process import Processor from resample import FLASH_THRESHOLD, FREQ, MF_WIDTH + +def create_test_namespace(vehicle_overrides=None, processing_overrides=None): + """Create a standardized test namespace using Processor's CONFIG_SCHEMA. + + Args: + vehicle_overrides: Dict of vehicle-specific overrides (mission, auv_name, etc.) + processing_overrides: Dict of processing-specific overrides (verbose, clobber, etc.) + + Returns: + argparse.Namespace with all CONFIG_SCHEMA attributes properly set + """ + # Start with Processor's config schema defaults + config = dict(Processor._CONFIG_SCHEMA) + + # Apply common test defaults + test_defaults = { + "base_path": os.getenv("BASE_PATH", BASE_PATH), + "local": True, + "noinput": True, + "noreprocess": False, + "use_portal": False, + "freq": FREQ, + "mf_width": MF_WIDTH, + "flash_threshold": FLASH_THRESHOLD, + "clobber": False, + "no_cleanup": True, + "num_cores": 1, + "verbose": 1, + } + config.update(test_defaults) + + # Apply vehicle-specific overrides + if vehicle_overrides: + config.update(vehicle_overrides) + + # Apply processing-specific overrides + if processing_overrides: + config.update(processing_overrides) + + # Create namespace and set all attributes + ns = Namespace() + for key, value in config.items(): + setattr(ns, key, value) + + return ns + + bootstrap_mission = """The working directory on a development machine must be bootstrapped with some mission data. Process the mission used for testing with: @@ -72,40 +119,19 @@ def calibration(mission_data): @pytest.fixture(scope="session", autouse=False) def complete_dorado_processing(): """Load a short mission to have some real data to work with""" - proc = Processor(TEST_VEHICLE, TEST_VEHICLE_DIR, TEST_MOUNT_DIR, TEST_CALIBRATION_DIR) - ns = Namespace() - ns.base_path = os.getenv("BASE_PATH", BASE_PATH) - ns.auv_name = TEST_VEHICLE - ns.mission = TEST_MISSION - ns.start_year = TEST_START_YEAR - # There are several options that need to be set to run the full processing - ns.clobber = False - proc.commandline = "args set in conftest.py::complete_dorado_processing()" - ns.local = True - ns.noinput = True - ns.noreprocess = False - ns.use_portal = False - ns.freq = FREQ - ns.mf_width = MF_WIDTH - ns.flash_threshold = FLASH_THRESHOLD - # Set step flags to false to force all steps to run as the logic in - # process_mission() is not fully implemented. - ns.download_process = False - ns.calibrate = False - ns.align = False - ns.resample = False - ns.create_products = False - ns.archive = False - ns.archive_only_products = False - ns.email_to = None - ns.cleanup = False - ns.no_cleanup = True - ns.skip_download_process = False - ns.num_cores = 1 - ns.add_seconds = None - ns.log_file = None - ns.verbose = 1 - proc.args = ns + # Create namespace with vehicle-specific settings + vehicle_overrides = { + "auv_name": TEST_VEHICLE, + "mission": TEST_MISSION, + "start_year": TEST_START_YEAR, + } + + ns = create_test_namespace(vehicle_overrides=vehicle_overrides) + + # Create processor using new factory method + proc = Processor.from_args( + TEST_VEHICLE, TEST_VEHICLE_DIR, TEST_MOUNT_DIR, TEST_CALIBRATION_DIR, ns + ) proc.process_missions(TEST_START_YEAR) return proc @@ -113,45 +139,23 @@ def complete_dorado_processing(): @pytest.fixture(scope="session", autouse=False) def complete_i2map_processing(): """Load a short mission to have some real data to work with""" - proc = Processor( + # Create namespace with i2map-specific settings + vehicle_overrides = { + "auv_name": TEST_I2MAP_VEHICLE, + "mission": TEST_I2MAP_MISSION, + "start_year": TEST_I2MAP_START_YEAR, + "last_n_days": 0, # i2map-specific setting + } + + ns = create_test_namespace(vehicle_overrides=vehicle_overrides) + + # Create processor using new factory method + proc = Processor.from_args( TEST_I2MAP_VEHICLE, TEST_I2MAP_VEHICLE_DIR, TEST_I2MAP_MOUNT_DIR, TEST_I2MAP_CALIBRATION_DIR, + ns, ) - ns = Namespace() - ns.base_path = os.getenv("BASE_PATH", BASE_PATH) - ns.auv_name = TEST_I2MAP_VEHICLE - ns.mission = TEST_I2MAP_MISSION - ns.start_year = TEST_I2MAP_START_YEAR - # There are several options that need to be set to run the full processing - ns.clobber = False - proc.commandline = "args set in conftest.py::complete_i2map_processing()" - ns.local = True - ns.noinput = True - ns.noreprocess = False - ns.use_portal = False - ns.freq = FREQ - ns.mf_width = MF_WIDTH - ns.flash_threshold = FLASH_THRESHOLD - # Set step flags to false to force all steps to run as the logic in - # process_mission() is not fully implemented. - ns.download_process = False - ns.calibrate = False - ns.align = False - ns.resample = False - ns.create_products = False - ns.archive = False - ns.archive_only_products = False - ns.email_to = None - ns.cleanup = False - ns.no_cleanup = True - ns.skip_download_process = False - ns.last_n_days = 0 - ns.num_cores = 1 - ns.add_seconds = None - ns.log_file = None - ns.verbose = 1 - proc.args = ns - proc.process_missions(TEST_START_YEAR) + proc.process_missions(TEST_I2MAP_START_YEAR) return proc diff --git a/src/data/process.py b/src/data/process.py index e059137f..fde779ca 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -110,7 +110,7 @@ def wrapper(self, log_file: str): if hasattr(self, "log_handler"): # Cleanup and archiving logic self.archive(mission=None, log_file=log_file) - if not self.args.no_cleanup: + if not self.config.get("no_cleanup"): self.cleanup(log_file=log_file) self.logger.info( "log_file %s took %.1f seconds to process", log_file, time.time() - t_start @@ -132,7 +132,7 @@ class Processor: logger.addHandler(_handler) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) - def __init__(self, auv_name, vehicle_dir, mount_dir, calibration_dir) -> None: + def __init__(self, auv_name, vehicle_dir, mount_dir, calibration_dir, config=None) -> None: # noqa: PLR0913 # Variables to be set by subclasses, e.g.: # auv_name = "i2map" # vehicle_dir = "/Volumes/M3/master/i2MAP" @@ -141,6 +141,105 @@ def __init__(self, auv_name, vehicle_dir, mount_dir, calibration_dir) -> None: self.vehicle_dir = vehicle_dir self.mount_dir = mount_dir self.calibration_dir = calibration_dir + self.config = config or {} + + # Configuration schema with defaults - shared between from_args and common_config + _CONFIG_SCHEMA = { + # Core configuration + "base_path": BASE_PATH, + "local": False, + "noinput": False, + "clobber": False, + "noreprocess": False, + "use_portal": False, + "add_seconds": None, + "verbose": 0, + "freq": FREQ, + "mf_width": MF_WIDTH, + "flash_threshold": None, + "log_file": None, + # Processing control + "download_process": False, + "calibrate": False, + "align": False, + "resample": False, + "archive": False, + "create_products": False, + "email_to": None, + "cleanup": False, + "no_cleanup": False, + "skip_download_process": False, + "archive_only_products": False, + "num_cores": None, + # Filtering/processing params (only used in from_args, not common_config) + "start_year": None, + "end_year": None, + "start_yd": None, + "end_yd": None, + "last_n_days": None, + "mission": None, + } + + # Subset of config schema that should be passed to child processes + _CHILD_CONFIG_KEYS = { + "base_path", + "local", + "noinput", + "clobber", + "noreprocess", + "use_portal", + "add_seconds", + "verbose", + "freq", + "mf_width", + "flash_threshold", + "log_file", + "download_process", + "calibrate", + "align", + "resample", + "archive", + "create_products", + "email_to", + "cleanup", + "no_cleanup", + "skip_download_process", + "archive_only_products", + "num_cores", + } + + @property + def common_config(self): + """Get common configuration used by all child processes""" + return { + key: self.config.get(key, self._CONFIG_SCHEMA[key]) for key in self._CHILD_CONFIG_KEYS + } + + def _create_child_namespace(self, **overrides): + """Create args namespace for child processes with config overrides""" + config = {**self.common_config, **overrides} + + namespace = argparse.Namespace() + for key, value in config.items(): + setattr(namespace, key, value) + return namespace + + @classmethod + def from_args(cls, auv_name, vehicle_dir, mount_dir, calibration_dir, args): # noqa: PLR0913 + """Factory method to create Processor from argparse namespace""" + config = {} + for key, default_value in cls._CONFIG_SCHEMA.items(): + # Handle special cases for args that might not exist or have different names + if key == "add_seconds": + config[key] = getattr(args, "add_seconds", default_value) + else: + config[key] = getattr(args, key, default_value) + + instance = cls(auv_name, vehicle_dir, mount_dir, calibration_dir, config) + instance.args = args # Keep reference for compatibility + instance.commandline = " ".join(sys.argv) # Set commandline attribute + instance.logger.setLevel(instance._log_levels[args.verbose]) # Set logger level + return instance def mission_list(self, start_year: int, end_year: int) -> dict: """Return a dictionary of source directories keyed by mission name.""" @@ -156,11 +255,11 @@ def mission_list(self, start_year: int, end_year: int) -> dict: else: find_cmd = f'find {safe_vehicle_dir} -regex "{REGEX}"' self.logger.debug("Executing %s", find_cmd) - if self.args.last_n_days: + if self.config.get("last_n_days"): self.logger.info( - "Will be looking back %d days for new missions...", self.args.last_n_days + "Will be looking back %d days for new missions...", self.config["last_n_days"] ) - find_cmd += f" -mtime -{self.args.last_n_days}" + find_cmd += f" -mtime -{self.config['last_n_days']}" self.logger.info("Finding missions from %s to %s", start_year, end_year) # Can be time consuming - use to discover missions lines = subprocess.getoutput(f"{find_cmd} | sort").split("\n") # noqa: S605 @@ -188,7 +287,7 @@ def get_mission_dir(self, mission: str) -> str: self.logger.info("Is %s mounted?", self.mount_dir) sys.exit(1) if self.auv_name.lower() == "dorado" or self.auv_name == "Dorado389": - if self.args.local: + if self.config.get("local"): path = Path(self.vehicle_dir, mission) else: year = mission.split(".")[0] @@ -217,19 +316,9 @@ def get_mission_dir(self, mission: str) -> str: def download_process(self, mission: str, src_dir: str) -> None: self.logger.info("Download and processing steps for %s", mission) auv_netcdf = AUV_NetCDF() - auv_netcdf.args = argparse.Namespace() - auv_netcdf.args.base_path = self.args.base_path - auv_netcdf.args.local = self.args.local - auv_netcdf.args.noinput = self.args.noinput - auv_netcdf.args.clobber = self.args.clobber - auv_netcdf.args.noreprocess = self.args.noreprocess - auv_netcdf.args.auv_name = self.auv_name - auv_netcdf.args.mission = mission - auv_netcdf.args.use_portal = self.args.use_portal - auv_netcdf.args.add_seconds = self.args.add_seconds + auv_netcdf.args = self._create_child_namespace(auv_name=self.auv_name, mission=mission) auv_netcdf.set_portal() - auv_netcdf.args.verbose = self.args.verbose - auv_netcdf.logger.setLevel(self._log_levels[self.args.verbose]) + auv_netcdf.logger.setLevel(self._log_levels[self.config["verbose"]]) auv_netcdf.logger.addHandler(self.log_handler) auv_netcdf.commandline = self.commandline auv_netcdf.download_process_logs(src_dir=src_dir) @@ -237,7 +326,7 @@ def download_process(self, mission: str, src_dir: str) -> None: # Run lopcToNetCDF.py - mimic log message from logs2netcdfs.py lopc_bin = Path( - self.args.base_path, + self.config["base_path"], self.auv_name, MISSIONLOGS, mission, @@ -251,25 +340,24 @@ def download_process(self, mission: str, src_dir: str) -> None: return self.logger.info("Processing file %s (%d bytes)", lopc_bin, file_size) lopc_processor = LOPC_Processor() - lopc_processor.args = argparse.Namespace() - lopc_processor.args.bin_fileName = lopc_bin - lopc_processor.args.netCDF_fileName = os.path.join( # noqa: PTH118 This is an arg, keep it a string - self.args.base_path, - self.auv_name, - MISSIONNETCDFS, - mission, - "lopc.nc", + lopc_processor.args = self._create_child_namespace( + bin_fileName=lopc_bin, + netCDF_fileName=os.path.join( # noqa: PTH118 This is an arg, keep it a string + self.config["base_path"], + self.auv_name, + MISSIONNETCDFS, + mission, + "lopc.nc", + ), + text_fileName="", + trans_AIcrit=0.4, + LargeCopepod_AIcrit=0.6, + LargeCopepod_ESDmin=1100.0, + LargeCopepod_ESDmax=1700.0, + debugLevel=0, + force=self.config["clobber"], ) - lopc_processor.args.text_fileName = "" - lopc_processor.args.trans_AIcrit = 0.4 - lopc_processor.args.LargeCopepod_AIcrit = 0.6 - lopc_processor.args.LargeCopepod_ESDmin = 1100.0 - lopc_processor.args.LargeCopepod_ESDmax = 1700.0 - lopc_processor.args.verbose = self.args.verbose - lopc_processor.args.debugLevel = 0 - lopc_processor.args.force = self.args.clobber - lopc_processor.args.noinput = self.args.noinput - lopc_processor.logger.setLevel(self._log_levels[self.args.verbose]) + lopc_processor.logger.setLevel(self._log_levels[self.config["verbose"]]) lopc_processor.logger.addHandler(self.log_handler) try: lopc_processor.main() @@ -280,18 +368,11 @@ def download_process(self, mission: str, src_dir: str) -> None: def calibrate(self, mission: str) -> None: self.logger.info("Calibration steps for %s", mission) cal_netcdf = Calibrate_NetCDF() - cal_netcdf.args = argparse.Namespace() - cal_netcdf.args.base_path = self.args.base_path - cal_netcdf.args.local = self.args.local - cal_netcdf.args.noinput = self.args.noinput - cal_netcdf.args.clobber = self.args.clobber - cal_netcdf.args.noreprocess = self.args.noreprocess - cal_netcdf.args.auv_name = self.auv_name - cal_netcdf.args.mission = mission - cal_netcdf.args.plot = None + cal_netcdf.args = self._create_child_namespace( + auv_name=self.auv_name, mission=mission, plot=None + ) cal_netcdf.calibration_dir = self.calibration_dir - cal_netcdf.args.verbose = self.args.verbose - cal_netcdf.logger.setLevel(self._log_levels[self.args.verbose]) + cal_netcdf.logger.setLevel(self._log_levels[self.config["verbose"]]) cal_netcdf.logger.addHandler(self.log_handler) cal_netcdf.commandline = self.commandline try: @@ -304,14 +385,10 @@ def calibrate(self, mission: str) -> None: def align(self, mission: str = "", log_file: str = "") -> None: self.logger.info("Alignment steps for %s", mission) align_netcdf = Align_NetCDF() - align_netcdf.args = argparse.Namespace() - align_netcdf.args.base_path = self.args.base_path - align_netcdf.args.auv_name = self.auv_name - align_netcdf.args.mission = mission - align_netcdf.args.log_file = self.args.log_file - align_netcdf.args.plot = None - align_netcdf.args.verbose = self.args.verbose - align_netcdf.logger.setLevel(self._log_levels[self.args.verbose]) + align_netcdf.args = self._create_child_namespace( + auv_name=self.auv_name, mission=mission, plot=None + ) + align_netcdf.logger.setLevel(self._log_levels[self.config["verbose"]]) align_netcdf.logger.addHandler(self.log_handler) align_netcdf.commandline = self.commandline try: @@ -331,17 +408,11 @@ def align(self, mission: str = "", log_file: str = "") -> None: def resample(self, mission: str = "") -> None: self.logger.info("Resampling steps for %s", mission) resamp = Resampler() - resamp.args = argparse.Namespace() - resamp.args.auv_name = self.auv_name - resamp.args.mission = mission - resamp.args.log_file = self.args.log_file - resamp.args.plot = None - resamp.args.freq = self.args.freq - resamp.args.mf_width = self.args.mf_width - resamp.args.flash_threshold = self.args.flash_threshold + resamp.args = self._create_child_namespace( + auv_name=self.auv_name, mission=mission, plot=None + ) resamp.commandline = self.commandline - resamp.args.verbose = self.args.verbose - resamp.logger.setLevel(self._log_levels[self.args.verbose]) + resamp.logger.setLevel(self._log_levels[self.config["verbose"]]) resamp.logger.addHandler(self.log_handler) file_name = f"{resamp.args.auv_name}_{resamp.args.mission}_align.nc" if resamp.args.log_file: @@ -349,16 +420,16 @@ def resample(self, mission: str = "") -> None: nc_file = Path(netcdfs_dir, f"{Path(resamp.args.log_file).stem}_align.nc") else: nc_file = Path( - self.args.base_path, + self.config["base_path"], resamp.args.auv_name, MISSIONNETCDFS, resamp.args.mission, file_name, ) - if self.args.flash_threshold and self.args.resample: + if self.config["flash_threshold"] and self.config["resample"]: self.logger.info( "Executing only resample step to produce netCDF file with flash_threshold = %s", - f"{self.args.flash_threshold:.0e}", + f"{self.config['flash_threshold']:.0e}", ) dap_file_str = os.path.join( # noqa: PTH118 AUVCTD_OPENDAP_BASE.replace("opendap/", ""), @@ -395,18 +466,10 @@ def archive( If mission is provided, archive the processed data for Dorado class vehicles. If log_file is provided, archive the processed data for LRAUV class vehicles.""" arch = Archiver(add_logger_handlers) - arch.args = argparse.Namespace() - arch.args.auv_name = self.auv_name + arch.args = self._create_child_namespace(auv_name=self.auv_name, mission=mission) arch.mount_dir = self.mount_dir - arch.args.mission = mission arch.commandline = self.commandline - arch.args.create_products = self.args.create_products - arch.args.archive_only_products = self.args.archive_only_products - arch.args.clobber = self.args.clobber - arch.args.resample = self.args.resample - arch.args.flash_threshold = self.args.flash_threshold - arch.args.verbose = self.args.verbose - arch.logger.setLevel(self._log_levels[self.args.verbose]) + arch.logger.setLevel(self._log_levels[self.config["verbose"]]) if add_logger_handlers: arch.logger.addHandler(self.log_handler) if mission: @@ -428,25 +491,21 @@ def archive( arch.args.mission, ) else: - arch.copy_to_AUVTCD(nc_file_base, self.args.freq) + arch.copy_to_AUVTCD(nc_file_base, self.config["freq"]) elif log_file: # LRAUV class vehicle archiving self.logger.info("Archiving steps for %s", log_file) - arch.copy_to_LRAUV(log_file, freq=self.args.freq) + arch.copy_to_LRAUV(log_file, freq=self.config["freq"]) else: arch.logger.error("Either mission or log_file must be provided for archiving.") arch.logger.removeHandler(self.log_handler) def create_products(self, mission: str) -> None: cp = CreateProducts() - cp.args = argparse.Namespace() - cp.args.base_path = self.args.base_path - cp.args.auv_name = self.auv_name - cp.args.mission = mission - cp.args.local = self.args.local - cp.args.start_esecs = None - cp.args.verbose = self.args.verbose - cp.logger.setLevel(self._log_levels[self.args.verbose]) + cp.args = self._create_child_namespace( + auv_name=self.auv_name, mission=mission, start_esecs=None + ) + cp.logger.setLevel(self._log_levels[self.config["verbose"]]) cp.logger.addHandler(self.log_handler) # cp.plot_biolume() @@ -458,13 +517,9 @@ def create_products(self, mission: str) -> None: def email(self, mission: str) -> None: self.logger.info("Sending notification email for %s", mission) email = Emailer() - email.args = argparse.Namespace() - email.args.auv_name = self.auv_name - email.args.mission = mission + email.args = self._create_child_namespace(auv_name=self.auv_name, mission=mission) email.commandline = self.commandline - email.args.clobber = self.args.clobber - email.args.verbose = self.args.verbose - email.logger.setLevel(self._log_levels[self.args.verbose]) + email.logger.setLevel(self._log_levels[self.config["verbose"]]) email.logger.addHandler(self.log_handler) def _remove_empty_parents(self, path: Path, stop_at: Path) -> None: @@ -495,10 +550,10 @@ def cleanup(self, mission: str = None, log_file: str = None) -> None: ) try: shutil.rmtree( - Path(self.args.base_path, self.auv_name, MISSIONLOGS, mission), + Path(self.config["base_path"], self.auv_name, MISSIONLOGS, mission), ) shutil.rmtree( - Path(self.args.base_path, self.auv_name, MISSIONNETCDFS, mission), + Path(self.config["base_path"], self.auv_name, MISSIONNETCDFS, mission), ) self.logger.info("Done removing %s work files", mission) except FileNotFoundError as e: @@ -523,13 +578,13 @@ def cleanup(self, mission: str = None, log_file: str = None) -> None: def process_mission(self, mission: str, src_dir: str = "") -> None: # noqa: C901, PLR0912, PLR0915 netcdfs_dir = Path( - self.args.base_path, + self.config["base_path"], self.auv_name, MISSIONNETCDFS, mission, ) - if self.args.clobber and ( - self.args.noinput + if self.config["clobber"] and ( + self.config["noinput"] or input("Do you want to remove all work files? [y/N] ").lower() == "y" ): self.cleanup(mission) @@ -538,7 +593,7 @@ def process_mission(self, mission: str, src_dir: str = "") -> None: # noqa: C90 Path(netcdfs_dir, f"{self.auv_name}_{mission}_{LOG_NAME}"), mode="w+", ) - self.log_handler.setLevel(self._log_levels[self.args.verbose]) + self.log_handler.setLevel(self._log_levels[self.config["verbose"]]) self.log_handler.setFormatter(AUV_NetCDF._formatter) self.logger.info( "=====================================================================================================================", @@ -574,30 +629,30 @@ def process_mission(self, mission: str, src_dir: str = "") -> None: # noqa: C90 except KeyError: error_message = f"{mission} not in dorado_info" raise MissingDoradoInfo(error_message) from None - if self.args.download_process: + if self.config["download_process"]: self.download_process(mission, src_dir) - elif self.args.calibrate: + elif self.config["calibrate"]: self.calibrate(mission) - elif self.args.align: + elif self.config["align"]: self.align(mission) - elif self.args.resample: + elif self.config["resample"]: self.resample(mission) - elif self.args.resample and self.args.archive: + elif self.config["resample"] and self.config["archive"]: self.resample(mission) self.archive(mission, add_logger_handlers=False) - elif self.args.create_products and self.args.archive: + elif self.config["create_products"] and self.config["archive"]: self.create_products(mission) self.archive(mission, add_logger_handlers=False) - elif self.args.create_products: + elif self.config["create_products"]: self.create_products(mission) - elif self.args.archive: + elif self.config["archive"]: self.archive(mission) - elif self.args.email_to: + elif self.config["email_to"]: self.email(mission) - elif self.args.cleanup: + elif self.config["cleanup"]: self.cleanup(mission) else: - if not self.args.skip_download_process: + if not self.config["skip_download_process"]: self.download_process(mission, src_dir) self.calibrate(mission) self.align(mission) @@ -621,12 +676,12 @@ def process_mission_job(self, mission: str, src_dir: str = "") -> None: except (TestMission, FailedMission) as e: self.logger.info(str(e)) finally: - if self.args.download_process: + if self.config["download_process"]: self.logger.info("Not archiving %s as --download_process is set", mission) else: # Still need to archive the mission, especially the processing.log file self.archive(mission) - if not self.args.no_cleanup: + if not self.config["no_cleanup"]: self.cleanup(mission) self.logger.info( "Mission %s took %.1f seconds to process", @@ -666,11 +721,11 @@ def process_mission_exception_wrapper( self.auv_name, mission, ) - if self.args.download_process: + if self.config["download_process"]: self.logger.info("Not archiving %s as --download_process is set", mission) else: self.archive(mission) - if not self.args.no_cleanup: + if not self.config["no_cleanup"]: self.cleanup(mission) self.logger.info( "Mission %s took %.1f seconds to process", @@ -679,34 +734,34 @@ def process_mission_exception_wrapper( ) self.logger.removeHandler(self.log_handler) - def process_missions(self, start_year: int) -> None: - if not self.args.start_year: - self.args.start_year = start_year - if self.args.mission: + def process_missions(self, start_year: int = None) -> None: + if not self.config.get("start_year"): + self.config["start_year"] = start_year + if self.config.get("mission"): # mission is string like: 2021.062.01 and is assumed to exist self.process_mission_exception_wrapper( - self.args.mission, - src_dir=self.get_mission_dir(self.args.mission), + self.config["mission"], + src_dir=self.get_mission_dir(self.config["mission"]), ) - elif self.args.start_year and self.args.end_year: + elif self.config.get("start_year") and self.config.get("end_year"): missions = self.mission_list( - start_year=self.args.start_year, - end_year=self.args.end_year, + start_year=self.config["start_year"], + end_year=self.config["end_year"], ) - if self.args.start_year == self.args.end_year: + if self.config["start_year"] == self.config["end_year"]: # Subselect missions by year day, has effect if --start_yd & --end_yd # are specified and --start_year & --end_year are the same missions = { mission: missions[mission] for mission in missions if ( - int(mission.split(".")[1]) >= self.args.start_yd - and int(mission.split(".")[1]) <= self.args.end_yd + int(mission.split(".")[1]) >= self.config["start_yd"] + and int(mission.split(".")[1]) <= self.config["end_yd"] ) } # https://pythonspeed.com/articles/python-multiprocessing/ - Swimming with sharks! - ncores = self.args.num_cores if self.args.num_cores else multiprocessing.cpu_count() + ncores = self.config.get("num_cores") or multiprocessing.cpu_count() missions = dict(sorted(missions.items())) if ncores > 1: self.logger.info( @@ -749,11 +804,9 @@ def process_missions(self, start_year: int) -> None: def extract(self, log_file: str) -> None: self.logger.info("Extracting log file: %s", log_file) extract = Extract() - extract.args = argparse.Namespace() - extract.args.verbose = self.args.verbose - extract.args.log_file = self.args.log_file + extract.args = self._create_child_namespace() extract.commandline = self.commandline - extract.logger.setLevel(self._log_levels[self.args.verbose]) + extract.logger.setLevel(self._log_levels[self.config["verbose"]]) extract.logger.addHandler(self.log_handler) url = os.path.join(BASE_LRAUV_WEB, log_file) # noqa: PTH118 @@ -769,12 +822,9 @@ def combine(self, log_file: str) -> None: "Adds nudge positions and more layers of quality control." ) combine = Combine_NetCDF() - combine.args = argparse.Namespace() - combine.args.plot = None - combine.args.verbose = self.args.verbose - combine.args.log_file = self.args.log_file + combine.args = self._create_child_namespace(plot=None) combine.commandline = self.commandline - combine.logger.setLevel(self._log_levels[self.args.verbose]) + combine.logger.setLevel(self._log_levels[self.config["verbose"]]) combine.logger.addHandler(self.log_handler) combine.combine_groups() @@ -787,7 +837,7 @@ def process_log_file(self, log_file: str) -> None: self.log_handler = logging.FileHandler( Path(netcdfs_dir, f"{Path(log_file).stem}_processing.log"), mode="w+" ) - self.log_handler.setLevel(self._log_levels[self.args.verbose]) + self.log_handler.setLevel(self._log_levels[self.config["verbose"]]) self.log_handler.setFormatter(AUV_NetCDF._formatter) self.logger.info( "=====================================================================================================================", @@ -803,11 +853,11 @@ def process_log_file(self, log_file: str) -> None: self.logger.info("Finished processing log file: %s", log_file) def process_log_files(self) -> None: - if self.args.log_file: + if self.config.get("log_file"): # log_file is string like: # brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4 - self.auv_name = self.args.log_file.split("/")[0].lower() - self.process_log_file(self.args.log_file) + self.auv_name = self.config["log_file"].split("/")[0].lower() + self.process_log_file(self.config["log_file"]) def process_command_line(self): parser = argparse.ArgumentParser( @@ -1025,6 +1075,7 @@ def process_command_line(self): self.logger.setLevel(self._log_levels[self.args.verbose]) self.commandline = " ".join(sys.argv) + return self.args if __name__ == "__main__": @@ -1033,7 +1084,15 @@ def process_command_line(self): CALIBRATION_DIR = "/Volumes/DMO/MDUC_CORE_CTD_200103/Calibration Files" MOUNT_DIR = "smb://thalassa.shore.mbari.org/M3" - # Initialize for i2MAP processing, meant to be subclassed for other vehicles - proc = Processor(AUV_NAME, VEHICLE_DIR, MOUNT_DIR, CALIBRATION_DIR) - proc.process_command_line() - proc.process_missions() + # Parse command line and initialize with config pattern + temp_proc = Processor(AUV_NAME, VEHICLE_DIR, MOUNT_DIR, CALIBRATION_DIR) + args = temp_proc.process_command_line() + + # Create configured processor instance + proc = Processor.from_args(AUV_NAME, VEHICLE_DIR, MOUNT_DIR, CALIBRATION_DIR, args) + + # Process based on arguments + if args.log_file: + proc.process_log_files() + else: + proc.process_missions(2020) diff --git a/src/data/process_Dorado389.py b/src/data/process_Dorado389.py index eac7caf6..9f3abdef 100755 --- a/src/data/process_Dorado389.py +++ b/src/data/process_Dorado389.py @@ -23,6 +23,10 @@ class DoradoProcessor(Processor): MOUNT_DIR = "smb://atlas.shore.mbari.org/AUVCTD" START_YEAR = 2011 - proc = DoradoProcessor(AUV_NAME, VEHICLE_DIR, MOUNT_DIR, CALIBRATION_DIR) - proc.process_command_line() + # Parse command line and initialize with config pattern + temp_proc = DoradoProcessor(AUV_NAME, VEHICLE_DIR, MOUNT_DIR, CALIBRATION_DIR) + args = temp_proc.process_command_line() + + # Create configured processor instance + proc = DoradoProcessor.from_args(AUV_NAME, VEHICLE_DIR, MOUNT_DIR, CALIBRATION_DIR, args) proc.process_missions(START_YEAR) diff --git a/src/data/process_dorado.py b/src/data/process_dorado.py index a60c0e3b..890ed4f8 100755 --- a/src/data/process_dorado.py +++ b/src/data/process_dorado.py @@ -36,6 +36,10 @@ class DoradoProcessor(Processor): MOUNT_DIR = "smb://atlas.shore.mbari.org/AUVCTD" START_YEAR = 2003 - proc = DoradoProcessor(AUV_NAME, VEHICLE_DIR, MOUNT_DIR, CALIBRATION_DIR) - proc.process_command_line() + # Parse command line and initialize with config pattern + temp_proc = DoradoProcessor(AUV_NAME, VEHICLE_DIR, MOUNT_DIR, CALIBRATION_DIR) + args = temp_proc.process_command_line() + + # Create configured processor instance + proc = DoradoProcessor.from_args(AUV_NAME, VEHICLE_DIR, MOUNT_DIR, CALIBRATION_DIR, args) proc.process_missions(START_YEAR) diff --git a/src/data/process_i2map.py b/src/data/process_i2map.py index c6ee2247..fe7a065d 100755 --- a/src/data/process_i2map.py +++ b/src/data/process_i2map.py @@ -35,6 +35,10 @@ class I2mapProcessor(Processor): MOUNT_DIR = "smb://thalassa.shore.mbari.org/M3" START_YEAR = 2017 - proc = I2mapProcessor(AUV_NAME, VEHICLE_DIR, MOUNT_DIR, CALIBRATION_DIR) - proc.process_command_line() + # Parse command line and initialize with config pattern + temp_proc = I2mapProcessor(AUV_NAME, VEHICLE_DIR, MOUNT_DIR, CALIBRATION_DIR) + args = temp_proc.process_command_line() + + # Create configured processor instance + proc = I2mapProcessor.from_args(AUV_NAME, VEHICLE_DIR, MOUNT_DIR, CALIBRATION_DIR, args) proc.process_missions(START_YEAR) diff --git a/src/data/process_lrauv.py b/src/data/process_lrauv.py index b3db134e..20986179 100755 --- a/src/data/process_lrauv.py +++ b/src/data/process_lrauv.py @@ -36,8 +36,11 @@ class LRAUVProcessor(Processor): # in the future, so point to a potential directory where they can be found. CALIBRATION_DIR = "/Volumes/DMO/MDUC_CORE_CTD_200103/Calibration Files" MOUNT_DIR = "smb://atlas.shore.mbari.org/LRAUV" - START_YEAR = 2012 - proc = LRAUVProcessor(AUV_NAME, LRAUV_DIR, MOUNT_DIR, CALIBRATION_DIR) - proc.process_command_line() + # Parse command line and initialize with config pattern + temp_proc = LRAUVProcessor(AUV_NAME, LRAUV_DIR, MOUNT_DIR, CALIBRATION_DIR) + args = temp_proc.process_command_line() + + # Create configured processor instance + proc = LRAUVProcessor.from_args(AUV_NAME, LRAUV_DIR, MOUNT_DIR, CALIBRATION_DIR, args) proc.process_log_files() From 50b46a9d94d30f3895725dfe6764129eaa94fa4d Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 20 Nov 2025 17:48:36 -0800 Subject: [PATCH 078/121] Add common_args.py and use to reduce arguement parsing code replication. --- src/data/__init__.py | 0 src/data/align.py | 64 ++-------- src/data/archive.py | 59 ++------- src/data/calibrate.py | 58 ++------- src/data/combine.py | 40 ++---- src/data/common_args.py | 235 ++++++++++++++++++++++++++++++++++++ src/data/create_products.py | 53 ++------ src/data/emailer.py | 52 ++------ src/data/logs2netcdfs.py | 83 ++----------- src/data/nc42netcdfs.py | 37 +----- src/data/resample.py | 76 ++---------- 11 files changed, 328 insertions(+), 429 deletions(-) create mode 100644 src/data/__init__.py create mode 100644 src/data/common_args.py diff --git a/src/data/__init__.py b/src/data/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/data/align.py b/src/data/align.py index 81cf28fe..21295e27 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -12,14 +12,12 @@ __author__ = "Mike McCann" __copyright__ = "Copyright 2021, Monterey Bay Aquarium Research Institute" -import argparse -import json +import json # noqa: I001 import logging import os import re import sys import time -from argparse import RawTextHelpFormatter from datetime import UTC, datetime from pathlib import Path from socket import gethostname @@ -27,17 +25,12 @@ import git import numpy as np import pandas as pd +from scipy.interpolate import interp1d import xarray as xr -from logs2netcdfs import ( - BASE_PATH, - MISSIONNETCDFS, - SUMMARY_SOURCE, - TIME, - TIME60HZ, - AUV_NetCDF, -) + +from common_args import get_standard_lrauv_parser +from logs2netcdfs import AUV_NetCDF, MISSIONNETCDFS, SUMMARY_SOURCE, TIME, TIME60HZ from nc42netcdfs import BASE_LRAUV_PATH -from scipy.interpolate import interp1d class InvalidCalFile(Exception): @@ -679,6 +672,7 @@ def write_netcdf(self, netcdfs_dir, vehicle: str = "", name: str = "") -> None: ) def process_command_line(self): + """Process command line arguments using shared parser infrastructure.""" examples = "Examples:" + "\n\n" examples += " Align calibrated data for some missions:\n" examples += " " + sys.argv[0] + " --mission 2020.064.10\n" @@ -691,57 +685,19 @@ def process_command_line(self): + "202509140809_202509150109.nc4\n" ) - parser = argparse.ArgumentParser( - formatter_class=RawTextHelpFormatter, + # Use shared LRAUV parser since align handles both Dorado and LRAUV + parser = get_standard_lrauv_parser( description=__doc__, epilog=examples, ) - parser.add_argument( - "--base_path", - action="store", - default=BASE_PATH, - help=f"Base directory for missionlogs and missionnetcdfs, default: {BASE_PATH}", - ) - parser.add_argument( - "--auv_name", - action="store", - default="Dorado389", - help="Dorado389 (default), i2MAP, or Multibeam", - ) - parser.add_argument( - "--mission", - action="store", - help="Mission directory, e.g.: 2020.064.10", - ) - parser.add_argument( - "--log_file", - action="store", - help=( - "Path to the log file of original LRAUV data, e.g.: " - "brizo/missionlogs/2025/20250903_20250909/" - "20250905T072042/202509050720_202509051653.nc4" - ), - ) + # Add align-specific arguments parser.add_argument( "--plot", action="store_true", help="Create intermediate plots to validate data operations.", ) - parser.add_argument( - "-v", - "--verbose", - type=int, - choices=range(3), - action="store", - default=0, - const=1, - nargs="?", - help="verbosity level: " - + ", ".join( - [f"{i}: {v}" for i, v in enumerate(("WARN", "INFO", "DEBUG"))], - ), - ) + self.args = parser.parse_args() self.logger.setLevel(self._log_levels[self.args.verbose]) self.commandline = " ".join(sys.argv) diff --git a/src/data/archive.py b/src/data/archive.py index 0593923a..69d43f31 100755 --- a/src/data/archive.py +++ b/src/data/archive.py @@ -9,19 +9,22 @@ __author__ = "Mike McCann" __copyright__ = "Copyright 2022, Monterey Bay Aquarium Research Institute" -import argparse -import logging +import logging # noqa: I001 import os import shutil import sys import time from pathlib import Path +from common_args import DEFAULT_BASE_PATH, get_standard_dorado_parser from create_products import MISSIONIMAGES, MISSIONODVS -from logs2netcdfs import BASE_PATH, LOG_FILES, MISSIONNETCDFS, AUV_NetCDF +from logs2netcdfs import AUV_NetCDF, LOG_FILES, MISSIONNETCDFS from nc42netcdfs import BASE_LRAUV_PATH, GROUP from resample import FREQ +# Define BASE_PATH for backward compatibility +BASE_PATH = DEFAULT_BASE_PATH + LOG_NAME = "processing.log" AUVCTD_VOL = "/Volumes/AUVCTD" LRAUV_VOL = "/Volumes/LRAUV" @@ -226,33 +229,13 @@ def copy_to_LRAUV(self, log_file: str, freq: str = FREQ) -> None: # noqa: C901, ) def process_command_line(self): - parser = argparse.ArgumentParser( - formatter_class=argparse.RawTextHelpFormatter, + """Process command line arguments using shared parser infrastructure.""" + # Use shared parser with archive-specific additions + parser = get_standard_dorado_parser( description=__doc__, ) - parser.add_argument( - "--base_path", - action="store", - default=BASE_PATH, - help="Base directory for missionlogs and missionnetcdfs, default: auv_data", - ) - parser.add_argument( - "--auv_name", - action="store", - default="Dorado389", - help="Dorado389 (default), i2map, or Multibeam", - ) - parser.add_argument( - "--mission", - action="store", - help="Mission directory, e.g.: 2020.064.10", - ) - parser.add_argument( - "--freq", - action="store", - default=FREQ, - help="Resample freq", - ) + + # Add archive-specific arguments parser.add_argument( "--M3", action="store_true", @@ -263,11 +246,6 @@ def process_command_line(self): action="store_true", help="Copy reampled netCDF file(s) to appropriate place on AUVCTD", ) - parser.add_argument( - "--clobber", - action="store_true", - help="Remove existing netCDF files before copying to the AUVCTD directory", - ) parser.add_argument( "--archive_only_products", action="store_true", @@ -278,20 +256,7 @@ def process_command_line(self): action="store_true", help="Create products from the resampled netCDF file(s)", ) - parser.add_argument( - "-v", - "--verbose", - type=int, - choices=range(3), - action="store", - default=0, - const=1, - nargs="?", - help="verbosity level: " - + ", ".join( - [f"{i}: {v}" for i, v in enumerate(("WARN", "INFO", "DEBUG"))], - ), - ) + self.args = parser.parse_args() self.logger.setLevel(self._log_levels[self.args.verbose]) self.commandline = " ".join(sys.argv) diff --git a/src/data/calibrate.py b/src/data/calibrate.py index c9e735f0..f06c1b69 100755 --- a/src/data/calibrate.py +++ b/src/data/calibrate.py @@ -27,15 +27,13 @@ __author__ = "Mike McCann" __copyright__ = "Copyright 2020, Monterey Bay Aquarium Research Institute" -import argparse # noqa: I001 -import logging +import logging # noqa: I001 import os import shlex import shutil import subprocess import sys import time -from argparse import RawTextHelpFormatter from collections import OrderedDict from datetime import UTC, datetime from pathlib import Path @@ -46,16 +44,17 @@ import defusedxml.ElementTree as ET # noqa: N817 import matplotlib.pyplot as plt import numpy as np +import pandas as pd +import pyproj import xarray as xr +from scipy import signal from scipy.interpolate import interp1d -from seawater import eos80 -import pandas as pd -import pyproj from AUV import monotonic_increasing_time_indices, nudge_positions +from common_args import get_standard_dorado_parser from hs2_proc import compute_backscatter, hs2_calc_bb, hs2_read_cal_file -from logs2netcdfs import BASE_PATH, MISSIONLOGS, MISSIONNETCDFS, TIME, TIME60HZ, AUV_NetCDF -from scipy import signal +from logs2netcdfs import AUV_NetCDF, MISSIONLOGS, MISSIONNETCDFS, TIME, TIME60HZ +from seawater import eos80 AVG_SALINITY = 33.6 # Typical value for upper 100m of Monterey Bay @@ -3282,39 +3281,19 @@ def process_logs(self, vehicle: str = "", name: str = "", process_gps: bool = Tr return netcdfs_dir def process_command_line(self): + """Process command line arguments using shared parser infrastructure.""" examples = "Examples:" + "\n\n" examples += " Calibrate original data for some missions:\n" examples += " " + sys.argv[0] + " --mission 2020.064.10\n" examples += " " + sys.argv[0] + " --auv_name i2map --mission 2020.055.01\n" - parser = argparse.ArgumentParser( - formatter_class=RawTextHelpFormatter, + # Use shared parser with calibrate-specific additions + parser = get_standard_dorado_parser( description=__doc__, epilog=examples, ) - parser.add_argument( - "--base_path", - action="store", - default=BASE_PATH, - help=f"Base directory for missionlogs and missionnetcdfs, default: {BASE_PATH}", - ) - parser.add_argument( - "--auv_name", - action="store", - default="Dorado389", - help="Dorado389 (default), i2MAP, or Multibeam", - ) - parser.add_argument( - "--mission", - action="store", - help="Mission directory, e.g.: 2020.064.10", - ) - parser.add_argument( - "--noinput", - action="store_true", - help="Execute without asking for a response, e.g. to not ask to re-download file", - ) + # Add calibrate-specific arguments parser.add_argument( "--plot", action="store", @@ -3322,24 +3301,9 @@ def process_command_line(self): " to validate data operations. Use first to plot " " points, e.g. first2000. Program blocks upon show.", ) - parser.add_argument( - "-v", - "--verbose", - type=int, - choices=range(3), - action="store", - default=0, - const=1, - nargs="?", - help="verbosity level: " - + ", ".join( - [f"{i}: {v}" for i, v in enumerate(("WARN", "INFO", "DEBUG"))], - ), - ) self.args = parser.parse_args() self.logger.setLevel(self._log_levels[self.args.verbose]) - self.commandline = " ".join(sys.argv) diff --git a/src/data/combine.py b/src/data/combine.py index a1f40b70..dc05ecec 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -38,22 +38,21 @@ __author__ = "Mike McCann" __copyright__ = "Copyright 2025, Monterey Bay Aquarium Research Institute" -import argparse # noqa: I001 -import json +import json # noqa: I001 import logging import sys import time -from argparse import RawTextHelpFormatter from datetime import UTC from pathlib import Path from socket import gethostname from typing import NamedTuple + import cf_xarray # Needed for the .cf accessor # noqa: F401 import numpy as np -import xarray as xr - import pandas as pd +import xarray as xr from AUV import monotonic_increasing_time_indices, nudge_positions +from common_args import get_standard_lrauv_parser from logs2netcdfs import AUV_NetCDF, TIME, TIME60HZ from nc42netcdfs import BASE_LRAUV_PATH, GROUP @@ -717,6 +716,7 @@ def write_netcdf(self) -> None: return netcdfs_dir def process_command_line(self): + """Process command line arguments using shared parser infrastructure.""" examples = "Examples:" + "\n\n" examples += " Combine original data from Group files for an LRAUV log file:\n" examples += ( @@ -727,43 +727,21 @@ def process_command_line(self): + "202509140809_202509150109.nc4\n" ) - parser = argparse.ArgumentParser( - formatter_class=RawTextHelpFormatter, + # Use shared parser with combine-specific additions + parser = get_standard_lrauv_parser( description=__doc__, epilog=examples, ) - parser.add_argument( - "--log_file", - action="store", - help=( - "Path to the log file of original LRAUV data, e.g.: " - "brizo/missionlogs/2025/20250903_20250909/" - "20250905T072042/202509050720_202509051653.nc4" - ), - ) + + # Add combine-specific arguments parser.add_argument( "--plot", action="store_true", help="Create intermediate plot(s) to help validate processing", ) - parser.add_argument( - "-v", - "--verbose", - type=int, - choices=range(3), - action="store", - default=0, - const=1, - nargs="?", - help="verbosity level: " - + ", ".join( - [f"{i}: {v}" for i, v in enumerate(("WARN", "INFO", "DEBUG"))], - ), - ) self.args = parser.parse_args() self.logger.setLevel(self._log_levels[self.args.verbose]) - self.commandline = " ".join(sys.argv) diff --git a/src/data/common_args.py b/src/data/common_args.py new file mode 100644 index 00000000..e79b6198 --- /dev/null +++ b/src/data/common_args.py @@ -0,0 +1,235 @@ +""" +Shared argument parser infrastructure for AUV data processing modules. + +Provides common argument parsers to eliminate duplication across modules +and ensure consistent command-line interfaces. +""" + +import argparse +from pathlib import Path + +# Define constants locally to avoid circular imports +DEFAULT_BASE_PATH = Path(__file__).parent.joinpath("../../data/auv_data").resolve() +DEFAULT_FREQ = "1S" # 1 Hz resampling frequency +DEFAULT_MF_WIDTH = 3 # Median filter width + + +class CommonArgumentParser: + """Shared argument parser factory for all AUV processing modules.""" + + @staticmethod + def get_core_parser(): + """Get parser with core arguments used across all modules. + + Returns: + argparse.ArgumentParser: Parser configured with add_help=False for parent use + """ + parser = argparse.ArgumentParser(add_help=False) + + # Core processing arguments - used by almost all modules + parser.add_argument( + "--base_path", + action="store", + default=DEFAULT_BASE_PATH, + help=f"Base directory for missionlogs and missionnetcdfs, default: {DEFAULT_BASE_PATH}", + ) + parser.add_argument( + "--auv_name", + action="store", + default="Dorado389", + help="AUV name: Dorado389 (default), i2map, or multibeam", + ) + parser.add_argument( + "--mission", + action="store", + help="Mission directory, e.g.: 2020.064.10", + ) + parser.add_argument( + "--noinput", + action="store_true", + help="Execute without asking for responses, e.g. to not ask to re-download file", + ) + parser.add_argument( + "--verbose", + type=int, + choices=range(3), + default=0, + help="Verbosity level: 0=WARN (default), 1=INFO, 2=DEBUG", + ) + + return parser + + @staticmethod + def get_processing_parser(): + """Get parser with common processing control arguments. + + Returns: + argparse.ArgumentParser: Parser configured with add_help=False for parent use + """ + parser = argparse.ArgumentParser(add_help=False) + + # Processing control arguments + parser.add_argument( + "--local", + action="store_true", + help="Specify if files are local in the MISSION directory", + ) + parser.add_argument( + "--clobber", + action="store_true", + help="Overwrite existing output files", + ) + parser.add_argument( + "--noreprocess", + action="store_true", + help="Don't re-process existing output files", + ) + + return parser + + @staticmethod + def get_dorado_parser(): + """Get parser with Dorado-specific arguments. + + Returns: + argparse.ArgumentParser: Parser configured with add_help=False for parent use + """ + parser = argparse.ArgumentParser(add_help=False) + + # Dorado-specific arguments + parser.add_argument( + "--add_seconds", + type=int, + help="Add seconds for GPS Week Rollover Bug", + ) + parser.add_argument( + "--use_portal", + action="store_true", + help="Download via portal instead of mount", + ) + parser.add_argument( + "--freq", + type=str, + default=DEFAULT_FREQ, + help=f"Resampling frequency in Hz, default: {DEFAULT_FREQ}", + ) + parser.add_argument( + "--mf_width", + type=int, + default=DEFAULT_MF_WIDTH, + help=f"Median filter width for smoothing, default: {DEFAULT_MF_WIDTH}", + ) + + return parser + + @staticmethod + def get_lrauv_parser(): + """Get parser with LRAUV-specific arguments. + + Returns: + argparse.ArgumentParser: Parser configured with add_help=False for parent use + """ + parser = argparse.ArgumentParser(add_help=False) + + # LRAUV-specific arguments + parser.add_argument( + "--log_file", + action="store", + help=( + "Path to the log file of original LRAUV data, e.g.: " + "brizo/missionlogs/2025/20250903_20250909/" + "20250905T072042/202509050720_202509051653.nc4" + ), + ) + + return parser + + @staticmethod + def get_time_range_parser(): + """Get parser with time range filtering arguments. + + Returns: + argparse.ArgumentParser: Parser configured with add_help=False for parent use + """ + parser = argparse.ArgumentParser(add_help=False) + + # Time range filtering arguments + parser.add_argument( + "--start_year", + type=int, + help="Start year for mission filtering", + ) + parser.add_argument( + "--end_year", + type=int, + help="End year for mission filtering", + ) + parser.add_argument( + "--start_yd", + type=int, + help="Start year day for mission filtering", + ) + parser.add_argument( + "--end_yd", + type=int, + help="End year day for mission filtering", + ) + parser.add_argument( + "--last_n_days", + type=int, + help="Process only the last N days of data", + ) + + return parser + + @classmethod + def create_parser(cls, module_name, parents=None, **kwargs): + """Create a parser with standard formatting and common parents. + + Args: + module_name: Name of the module (for help text) + parents: List of parent parsers to include + **kwargs: Additional arguments for ArgumentParser + + Returns: + argparse.ArgumentParser: Configured parser + """ + default_kwargs = { + "formatter_class": argparse.RawTextHelpFormatter, + "parents": parents or [], + } + default_kwargs.update(kwargs) + + return argparse.ArgumentParser(**default_kwargs) + + +# Convenience functions for common parser combinations +def get_standard_dorado_parser(**kwargs): + """Get parser with standard Dorado arguments (core + processing + dorado).""" + parents = [ + CommonArgumentParser.get_core_parser(), + CommonArgumentParser.get_processing_parser(), + CommonArgumentParser.get_dorado_parser(), + ] + return CommonArgumentParser.create_parser("dorado", parents=parents, **kwargs) + + +def get_standard_lrauv_parser(**kwargs): + """Get parser with standard LRAUV arguments (core + processing + lrauv).""" + parents = [ + CommonArgumentParser.get_core_parser(), + CommonArgumentParser.get_processing_parser(), + CommonArgumentParser.get_lrauv_parser(), + ] + return CommonArgumentParser.create_parser("lrauv", parents=parents, **kwargs) + + +def get_mission_processing_parser(**kwargs): + """Get parser with mission processing arguments (includes time range).""" + parents = [ + CommonArgumentParser.get_core_parser(), + CommonArgumentParser.get_processing_parser(), + CommonArgumentParser.get_dorado_parser(), + CommonArgumentParser.get_time_range_parser(), + ] + return CommonArgumentParser.create_parser("mission_processing", parents=parents, **kwargs) diff --git a/src/data/create_products.py b/src/data/create_products.py index aa5343a5..54dbdece 100755 --- a/src/data/create_products.py +++ b/src/data/create_products.py @@ -7,7 +7,7 @@ __author__ = "Mike McCann" __copyright__ = "Copyright 2023, Monterey Bay Aquarium Research Institute" -import argparse +import argparse # noqa: I001 import contextlib import logging import os @@ -22,11 +22,16 @@ import numpy as np import pyproj import xarray as xr + +from common_args import DEFAULT_BASE_PATH, get_standard_dorado_parser from gulper import Gulper -from logs2netcdfs import BASE_PATH, MISSIONNETCDFS, AUV_NetCDF +from logs2netcdfs import AUV_NetCDF, MISSIONNETCDFS from resample import AUVCTD_OPENDAP_BASE, FREQ from scipy.interpolate import griddata +# Define BASE_PATH for backward compatibility +BASE_PATH = DEFAULT_BASE_PATH + MISSIONODVS = "missionodvs" MISSIONIMAGES = "missionimages" @@ -524,51 +529,19 @@ def gulper_odv(self, sec_bnds: int = 1) -> str: # noqa: C901, PLR0912, PLR0915 ) def process_command_line(self): - parser = argparse.ArgumentParser( - formatter_class=argparse.RawTextHelpFormatter, + """Process command line arguments using shared parser infrastructure.""" + # Use shared parser with create_products-specific additions + parser = get_standard_dorado_parser( description=__doc__, ) - ( - parser.add_argument( - "--base_path", - action="store", - default=BASE_PATH, - help=f"Base directory for missionlogs and missionnetcdfs, default: {BASE_PATH}", - ), - ) - parser.add_argument( - "--auv_name", - action="store", - default="dorado", - help="dorado (default), i2map", - ) - ( - parser.add_argument( - "--mission", - action="store", - help="Mission directory, e.g.: 2020.064.10", - ), - ) + + # Add create_products-specific arguments parser.add_argument( "--start_esecs", help="Start time of mission in epoch seconds, optional for gulper time lookup", type=float, ) - parser.add_argument("--local", help="Read local files", action="store_true") - parser.add_argument( - "-v", - "--verbose", - type=int, - choices=range(3), - action="store", - default=0, - const=1, - nargs="?", - help="verbosity level: " - + ", ".join( - [f"{i}: {v}" for i, v in enumerate(("WARN", "INFO", "DEBUG"))], - ), - ) + self.args = parser.parse_args() self.logger.setLevel(self._log_levels[self.args.verbose]) self.commandline = " ".join(sys.argv) diff --git a/src/data/emailer.py b/src/data/emailer.py index 1459760b..4ff7b571 100755 --- a/src/data/emailer.py +++ b/src/data/emailer.py @@ -7,14 +7,17 @@ __author__ = "Mike McCann" __copyright__ = "Copyright 2023, Monterey Bay Aquarium Research Institute" -import argparse -import logging +import logging # noqa: I001 import platform import sys import time from pathlib import Path -from logs2netcdfs import BASE_PATH, MISSIONNETCDFS, AUV_NetCDF +from common_args import DEFAULT_BASE_PATH, get_standard_dorado_parser +from logs2netcdfs import AUV_NetCDF, MISSIONNETCDFS + +# Define BASE_PATH for backward compatibility +BASE_PATH = DEFAULT_BASE_PATH NOTIFICATION_EMAIL = "auvctd@listserver.mbari.org" TEMPLATE = """ @@ -90,31 +93,13 @@ def compose_message(self) -> str: ) def process_command_line(self): - parser = argparse.ArgumentParser( - formatter_class=argparse.RawTextHelpFormatter, + """Process command line arguments using shared parser infrastructure.""" + # Use shared parser with emailer-specific additions + parser = get_standard_dorado_parser( description=__doc__, ) - ( - parser.add_argument( - "--base_path", - action="store", - default=BASE_PATH, - help="Base directory for missionlogs and missionnetcdfs, default: auv_data", - ), - ) - parser.add_argument( - "--auv_name", - action="store", - default="Dorado389", - help="Dorado389 (default), i2map, or Multibeam", - ) - ( - parser.add_argument( - "--mission", - action="store", - help="Mission directory, e.g.: 2020.064.10", - ), - ) + + # Add emailer-specific arguments parser.add_argument( "--email_to", action="store", @@ -124,20 +109,7 @@ def process_command_line(self): f"default: {NOTIFICATION_EMAIL}" ), ) - parser.add_argument( - "-v", - "--verbose", - type=int, - choices=range(3), - action="store", - default=0, - const=1, - nargs="?", - help="verbosity level: " - + ", ".join( - [f"{i}: {v}" for i, v in enumerate(("WARN", "INFO", "DEBUG"))], - ), - ) + self.args = parser.parse_args() self.logger.setLevel(self._log_levels[self.args.verbose]) self.commandline = " ".join(sys.argv) diff --git a/src/data/logs2netcdfs.py b/src/data/logs2netcdfs.py index 444ca816..c931bcb8 100755 --- a/src/data/logs2netcdfs.py +++ b/src/data/logs2netcdfs.py @@ -9,7 +9,7 @@ __author__ = "Mike McCann" __copyright__ = "Copyright 2020, Monterey Bay Aquarium Research Institute" -import argparse +import argparse # noqa: I001 import asyncio import concurrent import logging @@ -27,8 +27,10 @@ import requests from aiohttp import ClientSession from aiohttp.client_exceptions import ClientConnectorError -from AUV import monotonic_increasing_time_indices from netCDF4 import Dataset + +from AUV import monotonic_increasing_time_indices +from common_args import get_standard_dorado_parser from readauvlog import log_record LOG_FILES = ( @@ -883,42 +885,19 @@ def set_portal(self) -> None: self.deployments_url = Path(self.args.portal, "deployments") def process_command_line(self): + """Process command line arguments using shared parser infrastructure.""" examples = "Examples:" + "\n\n" examples += " Write to local missionnetcdfs direcory:\n" examples += " " + sys.argv[0] + " --mission 2020.064.10\n" examples += " " + sys.argv[0] + " --auv_name i2map --mission 2020.055.01\n" - parser = argparse.ArgumentParser( - formatter_class=argparse.RawTextHelpFormatter, + # Use shared parser with logs2netcdfs-specific additions + parser = get_standard_dorado_parser( description=__doc__, epilog=examples, ) - parser.add_argument( - "--base_path", - action="store", - default=BASE_PATH, - help="Base directory for missionlogs and missionnetcdfs, default: auv_data", - ) - parser.add_argument( - "--auv_name", - action="store", - help=( - "Dorado389, i2map, or multibeam. Will be saved in " - "directory with this name no matter its portal entry" - ), - ) - parser.add_argument( - "--mission", - action="store", - help="Mission directory, e.g.: 2020.064.10", - ) - parser.add_argument( - "--local", - action="store_true", - help="Specify if files are local in the MISSION directory", - ) - + # Add logs2netcdfs-specific arguments parser.add_argument( "--title", action="store", @@ -929,22 +908,6 @@ def process_command_line(self): action="store", help="Additional information about the dataset", ) - - parser.add_argument( - "--noinput", - action="store_true", - help="Execute without asking for a response, e.g. to not ask to re-download file", - ) - parser.add_argument( - "--clobber", - action="store_true", - help="Use with --noinput to overwrite existing downloaded log files", - ) - parser.add_argument( - "--noreprocess", - action="store_true", - help="Use with --noinput to not re-process existing downloaded log files", - ) parser.add_argument( "--start", action="store", @@ -972,41 +935,11 @@ def process_command_line(self): " service, e.g.:" " http://stoqs.mbari.org:8080/auvdata/v1", ) - parser.add_argument( - "--use_portal", - action="store_true", - help=( - "Download data using portal (much faster than copy over" - " remote connection), otherwise copy from mount point" - ), - ) parser.add_argument( "--vehicle_dir", action="store", help="Directory for the vehicle's mission logs, e.g.: /Volumes/AUVCTD/missionlogs", ) - parser.add_argument( - # To use for mission 2025.316.02 which suffered from the GPS week rollover bug: - # 1024 * 7 * 24 * 3600 = 619315200 seconds to add to timeTag variables in the log_data - "--add_seconds", - type=int, - default=0, - help="Seconds to add to timeTag in log data", - ) - parser.add_argument( - "-v", - "--verbose", - type=int, - choices=range(3), - action="store", - default=0, - const=1, - nargs="?", - help="verbosity level: " - + ", ".join( - [f"{i}: {v}" for i, v in enumerate(("WARN", "INFO", "DEBUG"))], - ), - ) self.args = parser.parse_args() self.logger.setLevel(self._log_levels[self.args.verbose]) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index f4f5f51d..da65f9ec 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -8,7 +8,6 @@ __author__ = "Mike McCann" __copyright__ = "Copyright 2025, Monterey Bay Aquarium Research Institute" -import argparse import logging import os import sys @@ -20,6 +19,7 @@ import netCDF4 import numpy as np import pooch +from common_args import get_standard_lrauv_parser # Conditional imports for plotting (only when needed) try: @@ -1032,6 +1032,7 @@ def global_metadata(self, log_file: str, group_name: str): return metadata def process_command_line(self): + """Process command line arguments using shared parser infrastructure.""" examples = "Examples:" + "\n\n" examples += " Write to local missionnetcdfs direcory:\n" examples += " " + sys.argv[0] + " --mission 2020.064.10\n" @@ -1044,11 +1045,13 @@ def process_command_line(self): + "202509140809_202509150109.nc4 --plot_time /latitude_time\n" ) - parser = argparse.ArgumentParser( - formatter_class=argparse.RawTextHelpFormatter, + # Use shared parser with nc42netcdfs-specific additions + parser = get_standard_lrauv_parser( description=__doc__, epilog=examples, ) + + # Add nc42netcdfs-specific arguments parser.add_argument( "--filter_monotonic_time", action="store_true", @@ -1071,20 +1074,6 @@ def process_command_line(self): action="store", help="Convert a range of missions wth end time in YYYYMMDD format", ) - parser.add_argument( - "--auv_name", - action="store", - help="Name of the AUV and the directory name for its data, e.g.: tethys, ahi, pontus", - ) - parser.add_argument( - "--log_file", - action="store", - help=( - "Path to the log file for the mission, e.g.: " - "brizo/missionlogs/2025/20250903_20250909/" - "20250905T072042/202509050720_202509051653.nc4" - ), - ) parser.add_argument( "--known_hash", action="store", @@ -1110,20 +1099,6 @@ def process_command_line(self): "Format for is /Group/variable_name." ), ) - parser.add_argument( - "-v", - "--verbose", - type=int, - choices=range(3), - action="store", - default=0, - const=1, - nargs="?", - help="verbosity level: " - + ", ".join( - [f"{i}: {v}" for i, v in enumerate(("WARN", "INFO", "DEBUG"))], - ), - ) self.args = parser.parse_args() self.logger.setLevel(self._log_levels[self.args.verbose]) diff --git a/src/data/resample.py b/src/data/resample.py index bf1b9623..0fb6a7a0 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -9,8 +9,7 @@ __author__ = "Mike McCann" __copyright__ = "Copyright 2021, Monterey Bay Aquarium Research Institute" -import argparse -import logging +import logging # noqa: I001 import re import sys import time @@ -25,12 +24,14 @@ import numpy as np import pandas as pd import xarray as xr -from dorado_info import dorado_info -from logs2netcdfs import BASE_PATH, MISSIONNETCDFS, SUMMARY_SOURCE, TIME, AUV_NetCDF -from nc42netcdfs import BASE_LRAUV_PATH from pysolar.solar import get_altitude from scipy import signal +from common_args import get_standard_lrauv_parser +from dorado_info import dorado_info +from logs2netcdfs import AUV_NetCDF, BASE_PATH, MISSIONNETCDFS, SUMMARY_SOURCE, TIME +from nc42netcdfs import BASE_LRAUV_PATH + MF_WIDTH = 3 FREQ = "1S" PLOT_SECONDS = 300 @@ -1320,40 +1321,13 @@ def resample_mission( # noqa: C901, PLR0912, PLR0915, PLR0913 self.logger.info("Saved resampled mission to %s", out_fn) def process_command_line(self): - parser = argparse.ArgumentParser( - formatter_class=argparse.RawTextHelpFormatter, + """Process command line arguments using shared parser infrastructure.""" + # Use shared parser with resample-specific additions + parser = get_standard_lrauv_parser( description=__doc__, ) - ( - parser.add_argument( - "--base_path", - action="store", - default=BASE_PATH, - help="Base directory for missionlogs and missionnetcdfs, default: auv_data", - ), - ) - parser.add_argument( - "--auv_name", - action="store", - default="Dorado389", - help="Dorado389 (default), i2MAP, or Multibeam", - ) - ( - parser.add_argument( - "--mission", - action="store", - help="Mission directory, e.g.: 2020.064.10", - ), - ) - parser.add_argument( - "--log_file", - action="store", - help=( - "Path to the log file of original LRAUV data, e.g.: " - "brizo/missionlogs/2025/20250903_20250909/" - "20250905T072042/202509050720_202509051653.nc4" - ), - ) + + # Add resample-specific arguments parser.add_argument("--plot", action="store_true", help="Plot data") parser.add_argument( "--plot_seconds", @@ -1362,19 +1336,6 @@ def process_command_line(self): type=float, help="Plot seconds of data", ) - parser.add_argument( - "--mf_width", - action="store", - default=MF_WIDTH, - type=int, - help="Median filter width", - ) - parser.add_argument( - "--freq", - action="store", - default=FREQ, - help="Resample freq", - ) parser.add_argument( "--flash_threshold", action="store", @@ -1384,20 +1345,7 @@ def process_command_line(self): "and append to output filename" ), ) - parser.add_argument( - "-v", - "--verbose", - type=int, - choices=range(3), - action="store", - default=0, - const=1, - nargs="?", - help="verbosity level: " - + ", ".join( - [f"{i}: {v}" for i, v in enumerate(("WARN", "INFO", "DEBUG"))], - ), - ) + self.args = parser.parse_args() self.logger.setLevel(self._log_levels[self.args.verbose]) self.commandline = " ".join(sys.argv) From 83078408b8806b62ab674a831b5a5d9417ee0ede Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 20 Nov 2025 17:56:57 -0800 Subject: [PATCH 079/121] Fix linting with exceptions for non-pep8 module names. --- pyproject.toml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index f6684c65..cb55b1da 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,3 +110,10 @@ ignore = [ [tool.ruff.lint.per-file-ignores] "src/data/dorado_info.py" = ["E501"] +# Legacy module names that don't follow PEP 8 naming convention +"src/data/AUV.py" = ["N999"] +"src/data/BLFilter.py" = ["N999"] +"src/data/lopcMEP.py" = ["N999"] +"src/data/lopcToNetCDF.py" = ["N999"] +"src/data/process_Dorado389.py" = ["N999"] +"src/data/usblToNetCDF.py" = ["N999"] From 9ff09a4dac0287b6f36df4d8622f98892af67c12 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 20 Nov 2025 18:04:28 -0800 Subject: [PATCH 080/121] Fix pytest: Add the current directory to Python path so modules can import each other. --- src/data/conftest.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/data/conftest.py b/src/data/conftest.py index 3486953f..054dba6a 100644 --- a/src/data/conftest.py +++ b/src/data/conftest.py @@ -1,10 +1,16 @@ # noqa: INP001 import logging import os +import sys from argparse import Namespace from pathlib import Path import pytest + +# Add the current directory to Python path so modules can import each other +# This preserves the original import behavior while allowing package structure +sys.path.insert(0, str(Path(__file__).parent)) + from calibrate import Calibrate_NetCDF from hs2_proc import hs2_read_cal_file from logs2netcdfs import BASE_PATH, MISSIONLOGS From 15d93bb8d08aa4ec958866df275e1efe1a2e4476 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 20 Nov 2025 18:11:24 -0800 Subject: [PATCH 081/121] Update values for CI to pass in Actions and act. --- src/data/test_process_dorado.py | 6 +++--- src/data/test_process_i2map.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index d368b183..423feab3 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -32,7 +32,7 @@ def test_process_dorado(complete_dorado_processing): # If code changes are expected to change the file size then we should # update the expected size here. EXPECTED_SIZE_GITHUB = 621404 - EXPECTED_SIZE_ACT = 621298 + EXPECTED_SIZE_ACT = 621402 EXPECTED_SIZE_LOCAL = 621452 if str(proc.args.base_path).startswith("/home/runner"): # The size is different in GitHub Actions, maybe due to different metadata @@ -50,8 +50,8 @@ def test_process_dorado(complete_dorado_processing): check_md5 = True if check_md5: # Check that the MD5 hash has not changed - EXPECTED_MD5_GITHUB = "3bab0300e575c1d752a35f49e49e340e" - EXPECTED_MD5_ACT = "bdb9473e5dedb694618f518b8cf0ca1e" + EXPECTED_MD5_GITHUB = "631c25971f0e3b4f83f981389a179917" + EXPECTED_MD5_ACT = "bb1d539284bee531a00c4d4d99580bf0" EXPECTED_MD5_LOCAL = "9137be5a2ed840cfca94a723285355ec" if str(proc.args.base_path).startswith("/home/runner"): # The MD5 hash is different in GitHub Actions, maybe due to different metadata diff --git a/src/data/test_process_i2map.py b/src/data/test_process_i2map.py index 66508695..e7a9b553 100644 --- a/src/data/test_process_i2map.py +++ b/src/data/test_process_i2map.py @@ -31,7 +31,7 @@ def test_process_i2map(complete_i2map_processing): # If code changes are expected to change the file size then we should # update the expected size here. EXPECTED_SIZE_GITHUB = 58942 - EXPECTED_SIZE_ACT = 58816 + EXPECTED_SIZE_ACT = 58912 EXPECTED_SIZE_LOCAL = 59042 if str(proc.args.base_path).startswith("/home/runner"): # The size is different in GitHub Actions, maybe due to different metadata From 8b22fe2bbf1a5a10fc901b6b851ee4df4ebf28a9 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Thu, 20 Nov 2025 18:13:28 -0800 Subject: [PATCH 082/121] One more update in value for Actions. --- src/data/test_process_dorado.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/test_process_dorado.py b/src/data/test_process_dorado.py index 423feab3..1f00d2c5 100644 --- a/src/data/test_process_dorado.py +++ b/src/data/test_process_dorado.py @@ -50,7 +50,7 @@ def test_process_dorado(complete_dorado_processing): check_md5 = True if check_md5: # Check that the MD5 hash has not changed - EXPECTED_MD5_GITHUB = "631c25971f0e3b4f83f981389a179917" + EXPECTED_MD5_GITHUB = "3bab0300e575c1d752a35f49e49e340e" EXPECTED_MD5_ACT = "bb1d539284bee531a00c4d4d99580bf0" EXPECTED_MD5_LOCAL = "9137be5a2ed840cfca94a723285355ec" if str(proc.args.base_path).startswith("/home/runner"): From 3b3b7747bbeec307e9bddd5e71fb7a0464f349cc Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 21 Nov 2025 17:07:34 -0800 Subject: [PATCH 083/121] Add --start and --end options for lrauv log file processing. --- src/data/process.py | 168 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 166 insertions(+), 2 deletions(-) diff --git a/src/data/process.py b/src/data/process.py index fde779ca..369ca597 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -178,6 +178,9 @@ def __init__(self, auv_name, vehicle_dir, mount_dir, calibration_dir, config=Non "end_yd": None, "last_n_days": None, "mission": None, + "start": None, # LRAUV datetime filtering + "end": None, # LRAUV datetime filtering + "auv_name": None, # LRAUV AUV name filtering } # Subset of config schema that should be passed to child processes @@ -280,6 +283,125 @@ def mission_list(self, start_year: int, end_year: int) -> dict: self.logger.warning("Cannot parse year from %s", mission) return missions + def _parse_datetime_string(self, datetime_str: str) -> datetime | None: + """Parse datetime string in YYYYMMDDTHHMMSS format.""" + try: + return datetime.strptime(datetime_str, "%Y%m%dT%H%M%S").replace(tzinfo=UTC) + except ValueError: + return None + + def _normalize_datetime_dir(self, dir_datetime_str: str) -> str: + """Normalize datetime directory name to YYYYMMDDTHHMMSS format.""" + if "T" not in dir_datetime_str: + return "" + + PARTIAL_DATETIME_LEN = 13 # YYYYMMDDTHHNN format + SHORT_DATETIME_LEN = 11 # YYYYMMDDTHH format + + if len(dir_datetime_str) == PARTIAL_DATETIME_LEN: + return dir_datetime_str + "00" # Add seconds + if len(dir_datetime_str) == SHORT_DATETIME_LEN: + return dir_datetime_str + "0000" # Add minutes and seconds + return dir_datetime_str + + def _find_log_files_in_datetime_dir( + self, datetime_dir: Path, start_dt: datetime, end_dt: datetime + ) -> list: + """Find log files in a datetime directory if it's in range.""" + log_files = [] + + # Normalize and parse directory datetime + normalized_str = self._normalize_datetime_dir(datetime_dir.name) + if not normalized_str: + return log_files + + dir_dt = self._parse_datetime_string(normalized_str) + if not dir_dt: + return log_files + + # Check if directory datetime is in range + if start_dt <= dir_dt <= end_dt: + # Look for main log file (*.nc4 file) + nc4_files = list(datetime_dir.glob("*.nc4")) + if nc4_files: + relative_path = str(nc4_files[0].relative_to(Path(self.vehicle_dir))) + log_files.append(relative_path) + self.logger.debug("Found log file: %s", relative_path) + + return log_files + + def _should_process_auv_dir(self, auv_dir: Path, auv_name: str) -> bool: + """Check if an AUV directory should be processed based on auv_name filter.""" + if auv_name and auv_dir.name.lower() != auv_name.lower(): + return False + + missionlogs_dir = auv_dir / "missionlogs" + return missionlogs_dir.exists() + + def log_file_list(self, start_datetime: str, end_datetime: str, auv_name: str = None) -> list: + """Return a list of LRAUV log files within the specified datetime range. + + Args: + start_datetime: Start datetime in YYYYMMDDTHHMMSS format + end_datetime: End datetime in YYYYMMDDTHHMMSS format + auv_name: Optional AUV name to filter results (e.g., 'brizo', 'ahi') + + Returns: + List of log file paths relative to base_path + """ + log_files = [] + vehicle_dir = Path(self.vehicle_dir).resolve() + + # Parse datetime strings + start_dt = self._parse_datetime_string(start_datetime) + end_dt = self._parse_datetime_string(end_datetime) + + if not start_dt or not end_dt: + self.logger.exception("Invalid datetime format. Use YYYYMMDDTHHMMSS") + return log_files + + if auv_name: + self.logger.info( + "Finding log files from %s to %s for AUV: %s", + start_datetime, + end_datetime, + auv_name, + ) + else: + self.logger.info( + "Finding log files from %s to %s for all AUVs", + start_datetime, + end_datetime, + ) + + # Search through each AUV directory + for auv_dir in vehicle_dir.glob("*/"): + if not self._should_process_auv_dir(auv_dir, auv_name): + continue + + missionlogs_dir = auv_dir / "missionlogs" + + # Search through years + for year_dir in sorted(missionlogs_dir.glob("*/")): + try: + year = int(year_dir.name) + # Skip if year is clearly outside our range + if year < start_dt.year or year > end_dt.year: + continue + except ValueError: + continue + + # Search through date range directories and datetime directories + for date_range_dir in year_dir.glob("*/"): + for datetime_dir in date_range_dir.glob("*/"): + files_found = self._find_log_files_in_datetime_dir( + datetime_dir, start_dt, end_dt + ) + log_files.extend(files_found) + + self.logger.info("Found %d log files in date range", len(log_files)) + return log_files + def get_mission_dir(self, mission: str) -> str: """Return the mission directory.""" if not Path(self.vehicle_dir).exists(): @@ -827,8 +949,8 @@ def combine(self, log_file: str) -> None: combine.logger.setLevel(self._log_levels[self.config["verbose"]]) combine.logger.addHandler(self.log_handler) - combine.combine_groups() - combine.write_netcdf() + combine.combine_groups(log_file=log_file) + combine.write_netcdf(log_file=log_file) @log_file_processor def process_log_file(self, log_file: str) -> None: @@ -858,6 +980,28 @@ def process_log_files(self) -> None: # brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4 self.auv_name = self.config["log_file"].split("/")[0].lower() self.process_log_file(self.config["log_file"]) + elif self.config.get("start") and self.config.get("end"): + # Process multiple log files within datetime range + log_files = self.log_file_list( + self.config["start"], self.config["end"], self.config.get("auv_name") + ) + if not log_files: + self.logger.warning( + "No log files found in datetime range %s to %s", + self.config["start"], + self.config["end"], + ) + return + + self.logger.info("Processing %d log files in datetime range", len(log_files)) + for log_file in log_files: + # Extract AUV name from path + self.auv_name = log_file.split("/")[0].lower() + self.logger.info("Processing log file: %s", log_file) + self.process_log_file(log_file) + else: + self.logger.error("Must provide either --log_file or both --start and --end arguments") + return def process_command_line(self): parser = argparse.ArgumentParser( @@ -986,6 +1130,23 @@ def process_command_line(self): action="store", help="For LRAUV class data - process only this log file", ) + parser.add_argument( + "--start", + action="store", + help="For LRAUV class data - start processing from this datetime " + "(YYYYMMDDTHHMMSS format)", + ) + parser.add_argument( + "--end", + action="store", + help="For LRAUV class data - end processing at this datetime (YYYYMMDDTHHMMSS format)", + ) + parser.add_argument( + "--auv_name", + action="store", + help="For LRAUV class data - restrict log file search to this AUV name " + "(e.g., brizo, ahi). If not specified, all AUVs will be searched.", + ) parser.add_argument( "--freq", action="store", @@ -1094,5 +1255,8 @@ def process_command_line(self): # Process based on arguments if args.log_file: proc.process_log_files() + elif args.start and args.end: + # Process LRAUV log files in datetime range + proc.process_log_files() else: proc.process_missions(2020) From 5ffb2eb713630d4bc058931f27c7d4fc2df6293f Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 21 Nov 2025 17:08:08 -0800 Subject: [PATCH 084/121] Make INFO logs less verbose. --- src/data/nc42netcdfs.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index da65f9ec..85438e4a 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -241,10 +241,10 @@ def _extract_root_group( vars_to_extract, _ = self._get_available_variables(src_dataset, root_parms) # Add debugging output for root group processing - self.logger.info("=== ROOT GROUP DEBUG ===") - self.logger.info("Available variables: %s", sorted(vars_to_extract)) - self.logger.info("Available dimensions: %s", sorted(src_dataset.dimensions.keys())) - self.logger.info( + self.logger.debug("=== ROOT GROUP DEBUG ===") + self.logger.debug("Available variables: %s", sorted(vars_to_extract)) + self.logger.debug("Available dimensions: %s", sorted(src_dataset.dimensions.keys())) + self.logger.debug( "Available coordinate variables: %s", [v for v in sorted(src_dataset.variables.keys()) if v in src_dataset.dimensions], ) From af8f01f64f0479656c89e11e63c59de0d11bd7e9 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 21 Nov 2025 17:10:52 -0800 Subject: [PATCH 085/121] Fixup the --verbose option. --- src/data/common_args.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/data/common_args.py b/src/data/common_args.py index e79b6198..dcfd2427 100644 --- a/src/data/common_args.py +++ b/src/data/common_args.py @@ -51,9 +51,12 @@ def get_core_parser(): ) parser.add_argument( "--verbose", + "-v", type=int, choices=range(3), default=0, + const=1, + nargs="?", help="Verbosity level: 0=WARN (default), 1=INFO, 2=DEBUG", ) From 9fd0bbf886b4c8211ffc0c2e96835851018f37e0 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 21 Nov 2025 17:13:18 -0800 Subject: [PATCH 086/121] Pass log_file as arguement as --start & --end doesn't set --log_file. --- src/data/align.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/src/data/align.py b/src/data/align.py index 21295e27..896e37f5 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -48,10 +48,15 @@ class Align_NetCDF: logger.addHandler(_handler) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) - def global_metadata(self): + def global_metadata(self, auv_name: str = "", mission: str = "", log_file: str = "") -> dict: # noqa: PLR0915 """Use instance variables to return a dictionary of metadata specific for the data that are written """ + # Support calling with self.args values and for + # either mission/vehicle or log_file as method args + auv_name = self.args.auv_name or auv_name + mission = self.args.mission or mission + log_file = self.args.log_file or log_file # Try to get actual host name, fall back to container name actual_hostname = os.getenv("HOST_NAME", gethostname()) repo = git.Repo(search_parent_directories=True) @@ -95,10 +100,9 @@ def global_metadata(self): metadata["useconst"] = "Not intended for legal use. Data may contain inaccuracies." metadata["history"] = f"Created by {self.commandline} on {iso_now}" - if self.args.auv_name and self.args.mission: + if auv_name and mission: metadata["title"] = ( - f"Calibrated and aligned AUV sensor data from" - f" {self.args.auv_name} mission {self.args.mission}" + f"Calibrated and aligned AUV sensor data from {auv_name} mission {mission}" ) from_data = "calibrated data" metadata["source"] = ( @@ -114,10 +118,9 @@ def global_metadata(self): " and the coordinate variables aligned using MBARI's auv-python" " software." ) - elif self.args.log_file: + elif log_file: metadata["title"] = ( - f"Combined and aligned LRAUV instrument data from" - f" log file {Path(self.args.log_file)}" + f"Combined and aligned LRAUV instrument data from log file {Path(log_file)}" ) from_data = "combined data" metadata["source"] = ( @@ -145,7 +148,7 @@ def global_metadata(self): f" host {gethostname()}. Software available at" f" 'https://github.com/mbari-org/auv-python'" ) - elif self.args.log_file: + elif log_file: matches = re.search( "(" + SUMMARY_SOURCE.replace("{}", r".+$") + ")", self.combined_nc.attrs["summary"], @@ -384,6 +387,10 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 self.min_lon = np.inf self.max_lon = -np.inf + # Coordinates - use mapping from global variable_time_coord_mapping attribute + variable_time_coord_mapping = json.loads( + self.combined_nc.attrs.get("variable_time_coord_mapping", "{}") + ) # Find navigation coordinates from combined data - must be from universals group nav_coords = {} for coord_type in ["longitude", "latitude", "depth", "time"]: @@ -400,7 +407,7 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 try: lat_interp = interp1d( self.combined_nc[nav_coords["latitude"]] - .get_index("universals_time") + .get_index(variable_time_coord_mapping[nav_coords["latitude"]]) .view(np.int64) .tolist(), self.combined_nc[nav_coords["latitude"]].values, @@ -413,7 +420,7 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 lon_interp = interp1d( self.combined_nc[nav_coords["longitude"]] - .get_index("universals_time") + .get_index(variable_time_coord_mapping[nav_coords["longitude"]]) .view(np.int64) .tolist(), self.combined_nc[nav_coords["longitude"]].values, @@ -426,7 +433,7 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 depth_interp = interp1d( self.combined_nc[nav_coords["depth"]] - .get_index("universals_time") + .get_index(variable_time_coord_mapping[nav_coords["depth"]]) .view(np.int64) .tolist(), self.combined_nc[nav_coords["depth"]].values, @@ -532,10 +539,6 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 ) > pd.to_datetime(self.max_time): self.max_time = pd.to_datetime(self.aligned_nc[timevar][-1].values).tz_localize(UTC) - # Coordinates - use mapping from global variable_time_coord_mapping attribute - variable_time_coord_mapping = json.loads( - self.combined_nc.attrs.get("variable_time_coord_mapping", "{}") - ) time_coord = variable_time_coord_mapping.get(variable) depth_coord = ( time_coord[:-5] + "_depth" @@ -645,7 +648,7 @@ def write_combined_netcdf( vehicle = vehicle or self.args.auv_name out_fn = Path(netcdfs_dir, f"{vehicle}_{name}_align.nc") - self.aligned_nc.attrs = self.global_metadata() + self.aligned_nc.attrs = self.global_metadata(log_file=log_file) self.logger.info("Writing aligned combined data to %s", out_fn) if out_fn.exists(): self.logger.debug("Removing existing file %s", out_fn) From e082d69d84e4e2aadfe710a89c9ca26f5899eaea Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 21 Nov 2025 17:14:08 -0800 Subject: [PATCH 087/121] Pass in log_file and do plot before raising exception on error. --- src/data/AUV.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/data/AUV.py b/src/data/AUV.py index cffa6fe8..9915a748 100755 --- a/src/data/AUV.py +++ b/src/data/AUV.py @@ -37,6 +37,7 @@ def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 auv_name: str = "", mission: str = "", max_sec_diff_at_end: int = 10, + log_file: str = "", create_plots: bool = False, # noqa: FBT001, FBT002 ) -> tuple[xr.DataArray, xr.DataArray, int, float]: """ @@ -169,16 +170,23 @@ def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 end_lon_diff, end_lat_diff, ) - logger.info( - "Fix this error by calling _range_qc_combined_nc() in " - "_navigation_process() and/or _gps_process() for %s %s", - auv_name, - mission, - ) + if log_file: + logger.info( + "Fix this error by calling _range_qc_combined_nc() in " + "_navigation_process() and/or _gps_process() for %s", + log_file, + ) + logger.info("Run to get a plot: combine.py -v 1 --plot --log_file %s", log_file) + elif auv_name and mission: + logger.info( + "Fix this error by calling _range_qc_combined_nc() in " + "_navigation_process() and/or _gps_process() for %s %s", + auv_name, + mission, + ) error_message = ( f"abs(end_lon_diff) ({end_lon_diff}) > 1 or abs(end_lat_diff) ({end_lat_diff}) > 1" ) - raise ValueError(error_message) if abs(end_sec_diff) > max_sec_diff_at_end: logger.warning( "abs(end_sec_diff) (%s) > max_sec_diff_at_end (%s)", @@ -276,12 +284,16 @@ def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 name="latitude", ) - # Optional plotting code + # Optional plotting code - raise error after opportunity to plot if create_plots: _create_nudge_plots( lat, lon, lat_fix, lon_fix, lat_nudged, lon_nudged, auv_name, mission, logger ) + if error_message: + logger.error("Nudge positions error: %s", error_message) + raise ValueError(error_message) + return lon_nudged, lat_nudged, segment_count, segment_minsum From 787fe4421b0f1a188d4fecd185335a6b3ca7c12d Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 21 Nov 2025 17:15:33 -0800 Subject: [PATCH 088/121] Get the correct time coordinates for lat & lon as some log files have different dimensions for lat/lon and depth. --- src/data/combine.py | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/src/data/combine.py b/src/data/combine.py index dc05ecec..bf6cdf89 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -581,18 +581,23 @@ def _add_consolidation_comment(self, time_info: dict) -> None: f"Consolidated time coordinate from: {mapping_info}" ) - def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10) -> None: + def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10, log_file: str = "") -> None: """Add nudged longitude and latitude variables to the combined dataset.""" try: nudged_longitude, nudged_latitude, segment_count, segment_minsum = nudge_positions( # For LRAUV data the nav positions are shifted by 1 to align with GPS fixes - nav_longitude=self.combined_nc["universals_longitude"].shift(universals_time=1), - nav_latitude=self.combined_nc["universals_latitude"].shift(universals_time=1), + nav_longitude=self.combined_nc["universals_longitude"].shift( + **{self.variable_time_coord_mapping["universals_longitude"]: 1} + ), + nav_latitude=self.combined_nc["universals_latitude"].shift( + **{self.variable_time_coord_mapping["universals_latitude"]: 1} + ), gps_longitude=self.combined_nc["nal9602_longitude_fix"], gps_latitude=self.combined_nc["nal9602_latitude_fix"], logger=self.logger, auv_name="", mission="", + log_file=log_file, max_sec_diff_at_end=max_sec_diff_at_end, create_plots=self.args.plot, ) @@ -607,7 +612,11 @@ def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10) -> None: ) self.combined_nc["nudged_longitude"] = xr.DataArray( nudged_longitude, - coords=[self.combined_nc["universals_time"].to_numpy()], + coords=[ + self.combined_nc[ + self.variable_time_coord_mapping["universals_longitude"] + ].to_numpy() + ], dims={f"nudged_{TIME}"}, name="nudged_longitude", ) @@ -622,7 +631,9 @@ def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10) -> None: } self.combined_nc["nudged_latitude"] = xr.DataArray( nudged_latitude, - coords=[self.combined_nc["universals_time"].to_numpy()], + coords=[ + self.combined_nc[self.variable_time_coord_mapping["universals_latitude"]].to_numpy() + ], dims={f"nudged_{TIME}"}, name="nudged_latitude", ) @@ -636,9 +647,9 @@ def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10) -> None: ), } - def combine_groups(self): + def combine_groups(self, log_file: str = None) -> None: """Combine group files into a single NetCDF dataset with consolidated time coordinates.""" - log_file = self.args.log_file + log_file = self.args.log_file or log_file src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) group_files = sorted(src_dir.glob(f"{Path(log_file).stem}_{GROUP}_*.nc")) self.summary_fields = set() @@ -665,20 +676,20 @@ def combine_groups(self): self.variable_time_coord_mapping.update(time_info["variable_time_coord_mapping"]) # Write intermediate file for cf_xarray decoding - intermediate_file = self._intermediate_write_netcdf() + intermediate_file = self._intermediate_write_netcdf(log_file=log_file) with xr.open_dataset(intermediate_file, decode_cf=True) as ds: self.combined_nc = ds.load() # Add nudged coordinates - self._add_nudged_coordinates() + self._add_nudged_coordinates(log_file=log_file) # Clean up intermediate file Path(intermediate_file).unlink() - def _intermediate_write_netcdf(self) -> None: + def _intermediate_write_netcdf(self, log_file: str = None) -> None: """Write out an intermediate combined netCDF file so that data can be read using decode_cf=True for nudge_positions() to work with cf accessors.""" - log_file = self.args.log_file + log_file = self.args.log_file or log_file netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) out_fn = Path(netcdfs_dir, f"{Path(log_file).stem}_combined_intermediate.nc") @@ -687,7 +698,7 @@ def _intermediate_write_netcdf(self) -> None: if Path(out_fn).exists(): Path(out_fn).unlink() self.combined_nc.to_netcdf(out_fn) - self.logger.info( + self.logger.debug( "Data variables written: %s", ", ".join(sorted(self.combined_nc.variables)), ) @@ -697,8 +708,8 @@ def _intermediate_write_netcdf(self) -> None: ) return out_fn - def write_netcdf(self) -> None: - log_file = self.args.log_file + def write_netcdf(self, log_file: str = None) -> None: + log_file = self.args.log_file or log_file netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) out_fn = Path(netcdfs_dir, f"{Path(log_file).stem}_combined.nc") @@ -707,7 +718,7 @@ def write_netcdf(self) -> None: if Path(out_fn).exists(): Path(out_fn).unlink() self.combined_nc.to_netcdf(out_fn) - self.logger.info( + self.logger.debug( "Data variables written: %s", ", ".join(sorted(self.combined_nc.variables)), ) From 5084b5f7660f04845dced21182337429f7ff5d66 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 21 Nov 2025 17:16:04 -0800 Subject: [PATCH 089/121] Testing --start and --end for lrauv processing. --- .vscode/launch.json | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 48d7a853..83614e3d 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -120,7 +120,8 @@ "console": "integratedTerminal", "justMyCode": false, //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] - "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] + "args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120909T010636/201209090106_201209091521.nc4", "--plot"] }, { "name": "3.0 - align.py", @@ -312,9 +313,9 @@ //"args": ["-v", "1", "--noinput", "--no_cleanup", "--mission", "2020.337.00", "--clobber"] //"args": ["-v", "1", "--noinput", "--no_cleanup", "--mission", "2008.010.10"] //"args": ["-v", "2", "--mission", "2004.029.03", "--noinput", "--no_cleanup"], - //"args": ["-v", "1", "--mission", "2023.192.01", "--noinput", "--no_cleanup"], + "args": ["-v", "1", "--mission", "2023.192.01", "--noinput", "--no_cleanup"], //"args": ["-v", "1", "--mission", "2010.151.04", "--noinput", "--no_cleanup", "--clobber"], - "args": ["-v", "1", "--mission", "2025.316.02", "--noinput", "--no_cleanup", "--add_seconds", "619315200"], + //"args": ["-v", "1", "--mission", "2025.316.02", "--noinput", "--no_cleanup", "--add_seconds", "619315200"], }, { @@ -335,8 +336,12 @@ //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber"] //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber", "--no_cleanup"] //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup"] - "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup", "--clobber"] - //"args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901", "--end", "20121101", "--noinput"] + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup", "--clobber"] + // Has different universals time coodinates for longitude/latitude and depth + "args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901T000000", "--end", "20121101T000000", "--noinput", "--no_cleanup"] + //"args": ["-v", "1", "--auv_name", "brizo", "--start", "20250915T000000", "--end", "20250917T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] + // No nudged latitude and longitude variables + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250915T015535/202509150155_202509151602.nc4", "--no_cleanup"] }, ] From 8078ee8e7d7881c28b92af1aff36735a4d05ebd1 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 21 Nov 2025 17:21:39 -0800 Subject: [PATCH 090/121] Initialize error_message. --- src/data/AUV.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/data/AUV.py b/src/data/AUV.py index 9915a748..87aa2b65 100755 --- a/src/data/AUV.py +++ b/src/data/AUV.py @@ -116,6 +116,7 @@ def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 MIN_SEGMENT_LENGTH = 10 seg_count = 0 seg_minsum = 0 + error_message = "" for i in range(len(lat_fix) - 1): # Segment of dead reckoned (under water) positions, each surrounded by GPS fixes segi = np.where( From 712dc3d5b3f65ff1797fe1425def5796e75ace77 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 24 Nov 2025 09:59:07 -0800 Subject: [PATCH 091/121] Reorder missions_to_check. --- src/data/calibrate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/calibrate.py b/src/data/calibrate.py index f06c1b69..4833568e 100755 --- a/src/data/calibrate.py +++ b/src/data/calibrate.py @@ -1615,6 +1615,7 @@ def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 "2007.134.09", "2010.293.00", "2011.116.00", + "2011.166.00", "2013.227.00", "2016.348.00", "2017.121.00", @@ -1622,7 +1623,6 @@ def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 "2017.297.00", "2017.347.00", "2017.304.00", - "2011.166.00", } if self.args.mission in missions_to_check: self.logger.info( From 2a82b29e80606ce461d6c2c7969f033a18bf4f6e Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 24 Nov 2025 10:15:18 -0800 Subject: [PATCH 092/121] Add _initial_coordinate_qc() and fix the renaming of _qced variables. --- .vscode/launch.json | 10 +++++----- src/data/combine.py | 46 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 8 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 83614e3d..c987e2a3 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -14,7 +14,7 @@ //"args": ["--auv_name", "i2map", "--mission", "2020.055.01", "--noinput", "--local", "-v", "2", "--clobber"] //"args": ["--auv_name", "Dorado389", "--mission", "2020.245.00", "--noinput", "-v", "2", "--portal", "http://stoqs.mbari.org:8080/auvdata/v1", "--clobber"] //"args": ["--auv_name", "Dorado389", "--mission", "2020.245.00", "--noinput", "-v"] - //"args": ["--auv_name", "Dorado389", "--mission", "2017.297.00", "--local", "-v", "2"] + "args": ["--auv_name", "dorado", "--mission", "2017.297.00", "-v", "2", "--vehicle_dir", "/Volumes/AUVCTD/missionlogs"] //"args": ["--auv_name", "Dorado389", "--start", "20190701", "--end", "20191230", "-v", "2"] //"args": ["--auv_name", "i2map", "--mission", "2021.062.01", "--noinput", "-v", "1"] //"args": ["--auv_name", "dorado", "--mission", "2021.109.00", "--noinput", "-v"] @@ -26,7 +26,7 @@ //"args": ["--auv_name", "dorado", "--mission", "2010.265.00", "--noinput", "-v"] //"args": ["--auv_name", "dorado", "--mission", "2023.324.00", "--noinput", "-v", "--vehicle_dir", "/Volumes/AUVCTD/missionlogs"] // Mission suffering from GPS Rollover bug. Add 1024 * 7 * 24 * 3600 = 619315200 seconds - "args": ["--auv_name", "dorado", "--mission", "2025.316.02", "--noinput", "-v", "--vehicle_dir", "/Volumes/AUVCTD/missionlogs", "--add_seconds", "619315200" ] + //"args": ["--auv_name", "dorado", "--mission", "2025.316.02", "--noinput", "-v", "--vehicle_dir", "/Volumes/AUVCTD/missionlogs", "--add_seconds", "619315200" ] }, { "name": "1.1 - lopcToNetCDF", @@ -84,7 +84,7 @@ //"args": ["--auv_name", "dorado", "--mission", "2010.181.00", "--plot", "first1000", "-v", "1"] // OverflowError: time values outside range of 64 bit signed integers in calibrate.py:413 //"args": ["--auv_name", "dorado", "--mission", "2017.304.00", "--plot", "first1000", "-v", "1"] - //"args": ["--auv_name", "dorado", "--mission", "2017.297.00", "-v", "1"] + "args": ["--auv_name", "dorado", "--mission", "2017.297.00", "-v", "1"] //"args": ["--auv_name", "i2map", "--mission", "2022.094.01", "-v", "2"] //"args": ["--auv_name", "i2map", "--mission", "2018.025.00", "-v", "2"] //"args": ["--auv_name", "dorado", "--mission", "2017.248.01", "-v", "1"] @@ -102,7 +102,7 @@ //"args": ["--auv_name", "dorado", "--mission", "2018.079.00", "-v", "1"] //"args": ["--auv_name", "i2map", "--mission", "2018.348.01", "-v", "2"] //"args": ["--auv_name", "dorado", "--mission", "2023.324.00", "-v", "1", "--plot", "first10000"] - "args": ["--auv_name", "dorado", "--mission", "2022.201.00", "-v", "1", "--plot", "first10000"] + //"args": ["--auv_name", "dorado", "--mission", "2022.201.00", "-v", "1", "--plot", "first10000"] }, { "name": "2.1 - Test hs2_proc.py (its unit tests)", @@ -120,7 +120,7 @@ "console": "integratedTerminal", "justMyCode": false, //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] - //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot"] "args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120909T010636/201209090106_201209091521.nc4", "--plot"] }, { diff --git a/src/data/combine.py b/src/data/combine.py index bf6cdf89..46f1c3fc 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -213,13 +213,19 @@ def _range_qc_combined_nc( # noqa: C901, PLR0912 self.combined_nc[f"{var}_qced"] = ( self.combined_nc[var] .drop_isel({coord: out_of_range_indices}) - .rename({f"{instrument}_time": f"{instrument}_time_qced"}) + .rename({f"{coord}": f"{coord}_qced"}) + .rename(f"{var}_qced") ) self.combined_nc = self.combined_nc.drop_vars(inst_vars) for var in inst_vars: self.logger.debug("Renaming %s_qced to %s", var, var) - self.combined_nc[var] = self.combined_nc[f"{var}_qced"].rename( - {f"{coord}_qced": coord}, + coord = next(iter(self.combined_nc[f"{var}_qced"].coords)) + self.combined_nc[var] = ( + self.combined_nc[f"{var}_qced"] + .rename( + {f"{coord}": coord[:-5]}, # Remove '_qced' suffix from coord name + ) + .rename(var) ) qced_vars = [f"{var}_qced" for var in inst_vars] self.combined_nc = self.combined_nc.drop_vars(qced_vars) @@ -581,8 +587,42 @@ def _add_consolidation_comment(self, time_info: dict) -> None: f"Consolidated time coordinate from: {mapping_info}" ) + def _initial_coordinate_qc(self, log_file: str = "") -> None: + """Perform initial QC on core coordinate variables for specific log files.""" + log_file = log_file or self.args.log_file + if log_file in ( + "tethys/missionlogs/2012/20120908_20120920/20120909T010636/201209090106_201209091521.nc4", + ): + self.logger.info("Performing initial coordinate QC for %s", self.args.log_file) + self._range_qc_combined_nc( + instrument="universals", + variables=[ + "universals_longitude", + "universals_latitude", + ], + ranges={ + "universals_longitude": Range(-123.5, -121.5), + "universals_latitude": Range(35.0, 37.0), + }, + set_to_nan=False, + ) + self._range_qc_combined_nc( + instrument="nal9602", + variables=[ + "nal9602_longitude_fix", + "nal9602_latitude_fix", + ], + ranges={ + "nal9602_longitude_fix": Range(-123.5, -121.5), + "nal9602_latitude_fix": Range(35.0, 37.0), + }, + set_to_nan=False, + ) + def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10, log_file: str = "") -> None: """Add nudged longitude and latitude variables to the combined dataset.""" + log_file = log_file or self.args.log_file + self._initial_coordinate_qc(log_file=log_file) try: nudged_longitude, nudged_latitude, segment_count, segment_minsum = nudge_positions( # For LRAUV data the nav positions are shifted by 1 to align with GPS fixes From e9cf8d9986d00566641d99a895d0ac10479393f0 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 24 Nov 2025 10:18:07 -0800 Subject: [PATCH 093/121] Undo the shift of 1 of dead reckoned positions in relation to gps. --- .vscode/launch.json | 4 ++-- src/data/combine.py | 9 ++------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index c987e2a3..4d79882c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -120,8 +120,8 @@ "console": "integratedTerminal", "justMyCode": false, //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] - //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot"] - "args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120909T010636/201209090106_201209091521.nc4", "--plot"] + "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot"] + //"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120909T010636/201209090106_201209091521.nc4", "--plot"] }, { "name": "3.0 - align.py", diff --git a/src/data/combine.py b/src/data/combine.py index 46f1c3fc..9141192e 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -625,13 +625,8 @@ def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10, log_file: str = self._initial_coordinate_qc(log_file=log_file) try: nudged_longitude, nudged_latitude, segment_count, segment_minsum = nudge_positions( - # For LRAUV data the nav positions are shifted by 1 to align with GPS fixes - nav_longitude=self.combined_nc["universals_longitude"].shift( - **{self.variable_time_coord_mapping["universals_longitude"]: 1} - ), - nav_latitude=self.combined_nc["universals_latitude"].shift( - **{self.variable_time_coord_mapping["universals_latitude"]: 1} - ), + nav_longitude=self.combined_nc["universals_longitude"], + nav_latitude=self.combined_nc["universals_latitude"], gps_longitude=self.combined_nc["nal9602_longitude_fix"], gps_latitude=self.combined_nc["nal9602_latitude_fix"], logger=self.logger, From cb9d437cb5cd3f5ff84c23cd9367d01dc435f7b3 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 24 Nov 2025 13:31:39 -0800 Subject: [PATCH 094/121] Implement constructor-based architecture. Methods now use "self.attribute" consistently instead of optional arguments with fallbacks. --- .vscode/launch.json | 6 +- src/data/align.py | 114 ++++++++++++------- src/data/archive.py | 86 +++++++++----- src/data/calibrate.py | 219 +++++++++++++++++++++--------------- src/data/combine.py | 67 +++++++---- src/data/conftest.py | 26 +++-- src/data/create_products.py | 69 ++++++++---- src/data/logs2netcdfs.py | 188 +++++++++++++++++++++++-------- src/data/lopcMEP.py | 4 +- src/data/lopcToNetCDF.py | 4 +- src/data/nc42netcdfs.py | 35 +++++- src/data/process.py | 144 ++++++++++++++++-------- src/data/resample.py | 102 +++++++++++------ src/data/usblToNetCDF.py | 4 +- 14 files changed, 711 insertions(+), 357 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 4d79882c..82e4948c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -301,7 +301,7 @@ //"args": ["-v", "1", "--noinput", "--no_cleanup", "--start_year", "2007", "--end_year", "2007", "--create_products", "--num_cores", "1", "--archive_only_products"] //"args": ["-v", "1", "--noinput", "--no_cleanup", "--start_year", "2007", "--end_year", "2007", "--start_yd", "171", "--end_yd", "171", "--num_cores", "1", "--create_products", "--archive_only_products"] //"args": ["-v", "1", "--noinput", "--no_cleanup", "--mission", "2008.261.01", "--create_products", "--archive", "--archive_only_products"] - //"args": ["-v", "1", "--noinput", "--no_cleanup", "--start_year", "2011", "--end_year", "2011", "--start_yd", "158", "--end_yd", "164", "--num_cores", "1"] + "args": ["-v", "1", "--noinput", "--no_cleanup", "--start_year", "2011", "--end_year", "2011", "--start_yd", "158", "--end_yd", "164", "--num_cores", "1"] //"args": ["-v", "1", "--noinput", "--start_year", "2016", "--end_year", "2016", "--start_yd", "270", "--end_yd", "270", "--num_cores", "1", "--create_products", "--archive", "--archive_only_products"] //"args": ["-v", "1", "--noinput", "--num_cores", "1", "--mission", "2023.285.01"] //"args": ["-v", "1", "--noinput", "--no_cleanup", "--mission", "2018.079.00"] @@ -313,7 +313,7 @@ //"args": ["-v", "1", "--noinput", "--no_cleanup", "--mission", "2020.337.00", "--clobber"] //"args": ["-v", "1", "--noinput", "--no_cleanup", "--mission", "2008.010.10"] //"args": ["-v", "2", "--mission", "2004.029.03", "--noinput", "--no_cleanup"], - "args": ["-v", "1", "--mission", "2023.192.01", "--noinput", "--no_cleanup"], + //"args": ["-v", "1", "--mission", "2023.192.01", "--noinput", "--no_cleanup"], //"args": ["-v", "1", "--mission", "2010.151.04", "--noinput", "--no_cleanup", "--clobber"], //"args": ["-v", "1", "--mission", "2025.316.02", "--noinput", "--no_cleanup", "--add_seconds", "619315200"], @@ -339,6 +339,8 @@ //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup", "--clobber"] // Has different universals time coodinates for longitude/latitude and depth "args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901T000000", "--end", "20121101T000000", "--noinput", "--no_cleanup"] + // Conflicting sizes for nudged_time and data + //"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--no_cleanup" //"args": ["-v", "1", "--auv_name", "brizo", "--start", "20250915T000000", "--end", "20250917T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] // No nudged latitude and longitude variables //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250915T015535/202509150155_202509151602.nc4", "--no_cleanup"] diff --git a/src/data/align.py b/src/data/align.py index 896e37f5..497d47fd 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -48,15 +48,44 @@ class Align_NetCDF: logger.addHandler(_handler) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) - def global_metadata(self, auv_name: str = "", mission: str = "", log_file: str = "") -> dict: # noqa: PLR0915 + # noqa: PLR0913 - Many parameters needed for initialization + def __init__( # noqa: PLR0913 + self, + auv_name: str, + mission: str, + base_path: str, + log_file: str = "", + plot: str = None, + verbose: int = 0, + commandline: str = "", + ) -> None: + """Initialize Align_NetCDF with explicit parameters. + + Args: + auv_name: Name of the AUV (e.g., 'Dorado389', 'i2map', 'tethys') + mission: Mission identifier (e.g., '2011.256.02') + base_path: Base directory path for data + log_file: Optional LRAUV log file path for log-based processing + plot: Optional plot specification + verbose: Verbosity level (0=WARN, 1=INFO, 2=DEBUG) + commandline: Command line string for metadata + """ + self.auv_name = auv_name + self.mission = mission + self.base_path = base_path + self.log_file = log_file + self.plot = plot + self.verbose = verbose + self.commandline = commandline + self.logger.setLevel(self._log_levels[verbose]) + + def global_metadata(self) -> dict: # noqa: PLR0915 """Use instance variables to return a dictionary of metadata specific for the data that are written """ - # Support calling with self.args values and for - # either mission/vehicle or log_file as method args - auv_name = self.args.auv_name or auv_name - mission = self.args.mission or mission - log_file = self.args.log_file or log_file + auv_name = self.auv_name + mission = self.mission + log_file = self.log_file # Try to get actual host name, fall back to container name actual_hostname = os.getenv("HOST_NAME", gethostname()) repo = git.Repo(search_parent_directories=True) @@ -137,7 +166,7 @@ def global_metadata(self, auv_name: str = "", mission: str = "", log_file: str = " using MBARI's auv-python software." ) # Append location of original data files to summary - if self.args.auv_name and self.args.mission: + if self.auv_name and self.mission: matches = re.search( "(" + SUMMARY_SOURCE.replace("{}", r".+$") + ")", self.calibrated_nc.attrs["summary"], @@ -164,15 +193,14 @@ def global_metadata(self, auv_name: str = "", mission: str = "", log_file: str = return metadata - def process_cal(self, vehicle: str = "", name: str = "", log_file: str = "") -> None: # noqa: C901, PLR0912, PLR0915 - name = name or self.args.mission - vehicle = vehicle or self.args.auv_name - if name and vehicle: - netcdfs_dir = Path(self.args.base_path, vehicle, MISSIONNETCDFS, name) - src_file = Path(netcdfs_dir, f"{vehicle}_{name}_cal.nc") - elif log_file: - netcdfs_dir = Path(BASE_LRAUV_PATH, f"{Path(log_file).parent}") - src_file = Path(netcdfs_dir, f"{Path(log_file).stem}_cal.nc") + def process_cal(self) -> Path: # noqa: C901, PLR0912, PLR0915 + """Process calibrated netCDF file using instance attributes.""" + if self.mission and self.auv_name: + netcdfs_dir = Path(self.base_path, self.auv_name, MISSIONNETCDFS, self.mission) + src_file = Path(netcdfs_dir, f"{self.auv_name}_{self.mission}_cal.nc") + elif self.log_file: + netcdfs_dir = Path(BASE_LRAUV_PATH, f"{Path(self.log_file).parent}") + src_file = Path(netcdfs_dir, f"{Path(self.log_file).stem}_cal.nc") else: msg = "Must provide either mission and vehicle or log_file" raise ValueError(msg) @@ -370,10 +398,10 @@ def process_cal(self, vehicle: str = "", name: str = "", log_file: str = "") -> return netcdfs_dir - def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR0915 + def process_combined(self) -> Path: # noqa: C901, PLR0912, PLR0915 """Process combined LRAUV data from *_combined.nc files created by combine.py""" - netcdfs_dir = Path(BASE_LRAUV_PATH, f"{Path(log_file).parent}") - src_file = Path(netcdfs_dir, f"{Path(log_file).stem}_combined.nc") + netcdfs_dir = Path(BASE_LRAUV_PATH, f"{Path(self.log_file).parent}") + src_file = Path(netcdfs_dir, f"{Path(self.log_file).stem}_combined.nc") self.combined_nc = xr.open_dataset(src_file) self.logger.info("Processing %s", src_file) @@ -636,19 +664,15 @@ def process_combined(self, log_file: str) -> None: # noqa: C901, PLR0912, PLR09 return netcdfs_dir - def write_combined_netcdf( - self, netcdfs_dir, vehicle: str = "", name: str = "", log_file: str = "" - ) -> None: + def write_combined_netcdf(self, netcdfs_dir: Path) -> None: """Write aligned combined data to NetCDF file""" - if log_file: + if self.log_file: # For LRAUV log files, use the log file stem for output name - out_fn = Path(netcdfs_dir, f"{Path(log_file).stem}_align.nc") + out_fn = Path(netcdfs_dir, f"{Path(self.log_file).stem}_align.nc") else: - name = name or self.args.mission - vehicle = vehicle or self.args.auv_name - out_fn = Path(netcdfs_dir, f"{vehicle}_{name}_align.nc") + out_fn = Path(netcdfs_dir, f"{self.auv_name}_{self.mission}_align.nc") - self.aligned_nc.attrs = self.global_metadata(log_file=log_file) + self.aligned_nc.attrs = self.global_metadata() self.logger.info("Writing aligned combined data to %s", out_fn) if out_fn.exists(): self.logger.debug("Removing existing file %s", out_fn) @@ -659,11 +683,10 @@ def write_combined_netcdf( ", ".join(sorted(self.aligned_nc.variables)), ) - def write_netcdf(self, netcdfs_dir, vehicle: str = "", name: str = "") -> None: - name = name or self.args.mission - vehicle = vehicle or self.args.auv_name + def write_netcdf(self, netcdfs_dir: Path) -> None: + """Write aligned netCDF file using instance attributes.""" self.aligned_nc.attrs = self.global_metadata() - out_fn = Path(netcdfs_dir, f"{vehicle}_{name}_align.nc") + out_fn = Path(netcdfs_dir, f"{self.auv_name}_{self.mission}_align.nc") self.logger.info("Writing aligned data to %s", out_fn) if out_fn.exists(): self.logger.debug("Removing file %s", out_fn) @@ -701,21 +724,32 @@ def process_command_line(self): help="Create intermediate plots to validate data operations.", ) - self.args = parser.parse_args() - self.logger.setLevel(self._log_levels[self.args.verbose]) - self.commandline = " ".join(sys.argv) + args = parser.parse_args() + + # Reinitialize object with parsed arguments + self.__init__( + auv_name=args.auv_name, + mission=args.mission, + base_path=args.base_path, + log_file=args.log_file if hasattr(args, "log_file") else None, + plot=args.plot if hasattr(args, "plot") else False, + verbose=args.verbose, + commandline=" ".join(sys.argv), + ) + self.logger.setLevel(self._log_levels[args.verbose]) if __name__ == "__main__": - align_netcdf = Align_NetCDF() + # Create with default values for command-line usage + align_netcdf = Align_NetCDF(auv_name="", mission="", base_path="") align_netcdf.process_command_line() p_start = time.time() - if align_netcdf.args.log_file: + if align_netcdf.log_file: # Process combined LRAUV data using log_file - netcdf_dir = align_netcdf.process_combined(log_file=align_netcdf.args.log_file) - align_netcdf.write_combined_netcdf(netcdf_dir, log_file=align_netcdf.args.log_file) - elif align_netcdf.args.auv_name and align_netcdf.args.mission: + netcdf_dir = align_netcdf.process_combined() + align_netcdf.write_combined_netcdf(netcdf_dir) + elif align_netcdf.auv_name and align_netcdf.mission: # Process calibrated data using auv_name and mission netcdf_dir = align_netcdf.process_cal() align_netcdf.write_netcdf(netcdf_dir) diff --git a/src/data/archive.py b/src/data/archive.py index 69d43f31..78222899 100755 --- a/src/data/archive.py +++ b/src/data/archive.py @@ -36,7 +36,44 @@ class Archiver: _handler.setFormatter(AUV_NetCDF._formatter) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) - def __init__(self, add_handlers=True): # noqa: FBT002 + def __init__( # noqa: PLR0913 + self, + add_handlers: bool = True, # noqa: FBT001, FBT002 + auv_name: str = None, + mission: str = None, + clobber: bool = False, # noqa: FBT001, FBT002 + resample: bool = False, # noqa: FBT001, FBT002 + flash_threshold: float = None, + archive_only_products: bool = False, # noqa: FBT001, FBT002 + create_products: bool = False, # noqa: FBT001, FBT002 + verbose: int = 0, + commandline: str = "", + ): + """Initialize Archiver with explicit parameters. + + Args: + add_handlers: Whether to add logging handlers + auv_name: Name of the AUV vehicle + mission: Mission identifier + clobber: Overwrite existing files + resample: Resample flag + flash_threshold: Flash detection threshold + archive_only_products: Archive only product files + create_products: Create product files flag + verbose: Verbosity level (0-2) + commandline: Command line string for tracking + """ + self.auv_name = auv_name + self.mission = mission + self.clobber = clobber + self.resample = resample + self.flash_threshold = flash_threshold + self.archive_only_products = archive_only_products + self.create_products = create_products + self.verbose = verbose + self.commandline = commandline + self.mount_dir = None # Will be set by caller + if add_handlers: self.logger.addHandler(self._handler) @@ -56,29 +93,26 @@ def copy_to_AUVTCD(self, nc_file_base: Path, freq: str = FREQ) -> None: # noqa: self.logger.exception("%s not found", surveys_dir) self.logger.info("Is smb://atlas.shore.mbari.org/AUVCTD mounted?") sys.exit(1) - year = self.args.mission.split(".")[0] + year = self.mission.split(".")[0] surveynetcdfs_dir = Path(surveys_dir, year, "netcdf") # To avoid "fchmod failed: Permission denied" message use shutil.copyfile - if not self.args.archive_only_products: + if not self.archive_only_products: self.logger.info("Archiving %s files to %s", nc_file_base, surveynetcdfs_dir) # Copy netCDF files to AUVCTD/surveys/YYYY/netcdf - if hasattr(self.args, "flash_threshold"): - if self.args.flash_threshold and self.args.resample: - ft_ending = f"{freq}_ft{self.args.flash_threshold:.0E}.nc".replace( - "E+", - "E", - ) - ftypes = (ft_ending,) - else: - ftypes = (f"{freq}.nc", "cal.nc", "align.nc") + if self.flash_threshold and self.resample: + ft_ending = f"{freq}_ft{self.flash_threshold:.0E}.nc".replace( + "E+", + "E", + ) + ftypes = (ft_ending,) else: ftypes = (f"{freq}.nc", "cal.nc", "align.nc") for ftype in ftypes: src_file = Path(f"{nc_file_base}_{ftype}") dst_file = Path(surveynetcdfs_dir, src_file.name) - if self.args.clobber: + if self.clobber: if dst_file.exists(): self.logger.info("Removing %s", dst_file) dst_file.unlink() @@ -91,15 +125,15 @@ def copy_to_AUVTCD(self, nc_file_base: Path, freq: str = FREQ) -> None: # noqa: src_file.name, ) - if not hasattr(self.args, "resample") or not self.args.resample: + if not self.resample: # Copy intermediate files to AUVCTD/missionnetcdfs/YYYY/YYYYJJJ - YYYYJJJ = "".join(self.args.mission.split(".")[:2]) + YYYYJJJ = "".join(self.mission.split(".")[:2]) missionnetcdfs_dir = Path( AUVCTD_VOL, MISSIONNETCDFS, year, YYYYJJJ, - self.args.mission, + self.mission, ) Path(missionnetcdfs_dir).mkdir(parents=True, exist_ok=True) src_dir = Path(nc_file_base).parent @@ -107,7 +141,7 @@ def copy_to_AUVTCD(self, nc_file_base: Path, freq: str = FREQ) -> None: # noqa: # so that lopc.nc is archived along with the other netcdf versions of the log files. for log in [*LOG_FILES, "lopc.log"]: src_file = Path(src_dir, f"{log.replace('.log', '')}.nc") - if self.args.clobber: + if self.clobber: if src_file.exists(): shutil.copyfile(src_file, missionnetcdfs_dir / src_file.name) self.logger.info("copyfile %s %s done.", src_file, missionnetcdfs_dir) @@ -122,14 +156,14 @@ def copy_to_AUVTCD(self, nc_file_base: Path, freq: str = FREQ) -> None: # noqa: for src_dir, dst_dir in ((MISSIONODVS, "odv"), (MISSIONIMAGES, "images")): src_dir = Path( # noqa: PLW2901 BASE_PATH, - self.args.auv_name, + self.auv_name, src_dir, - self.args.mission, + self.mission, ) if Path(src_dir).exists(): dst_dir = Path(surveys_dir, year, dst_dir) # noqa: PLW2901 Path(dst_dir).mkdir(parents=True, exist_ok=True) - if self.args.clobber: + if self.clobber: # Copy files individually to avoid permission issues with copytree. # This will not copy subdirectories, but we don't expect any. for src_file in src_dir.glob("*"): @@ -151,7 +185,7 @@ def copy_to_AUVTCD(self, nc_file_base: Path, freq: str = FREQ) -> None: # noqa: ) else: self.logger.debug("%s not found", src_dir) - if self.args.create_products or (hasattr(self.args, "resample") and self.args.resample): + if self.create_products or self.resample: # Do not copy processing.log file if only partial processing was done self.logger.info( "Partial processing, not archiving %s", @@ -162,7 +196,7 @@ def copy_to_AUVTCD(self, nc_file_base: Path, freq: str = FREQ) -> None: # noqa: src_file = Path(f"{nc_file_base}_{LOG_NAME}") dst_file = Path(surveynetcdfs_dir, src_file.name) if src_file.exists(): - if self.args.clobber: + if self.clobber: self.logger.info("copyfile %s %s", src_file, surveynetcdfs_dir) shutil.copyfile(src_file, dst_file) self.logger.info("copyfile %s %s done.", src_file, surveynetcdfs_dir) @@ -187,7 +221,7 @@ def copy_to_LRAUV(self, log_file: str, freq: str = FREQ) -> None: # noqa: C901, sys.exit(1) for src_file in sorted(src_dir.glob(f"{Path(log_file).stem}_{GROUP}_*.nc")): dst_file = Path(dst_dir, src_file.name) - if self.args.clobber: + if self.clobber: if dst_file.exists(): self.logger.info("Removing %s", dst_file) dst_file.unlink() @@ -202,7 +236,7 @@ def copy_to_LRAUV(self, log_file: str, freq: str = FREQ) -> None: # noqa: C901, for ftype in (f"{freq}.nc", "combined.nc", "align.nc"): src_file = Path(src_dir, f"{Path(log_file).stem}_{ftype}") dst_file = Path(dst_dir, src_file.name) - if self.args.clobber: + if self.clobber: if dst_file.exists(): self.logger.info("Removing %s", dst_file) dst_file.unlink() @@ -218,7 +252,7 @@ def copy_to_LRAUV(self, log_file: str, freq: str = FREQ) -> None: # noqa: C901, src_file = Path(src_dir, f"{Path(log_file).stem}_{LOG_NAME}") dst_file = Path(dst_dir, src_file.name) if src_file.exists(): - if self.args.clobber: + if self.clobber: self.logger.info("copyfile %s %s", src_file, dst_dir) shutil.copyfile(src_file, dst_file) self.logger.info("copyfile %s %s done.", src_file, dst_dir) @@ -258,7 +292,7 @@ def process_command_line(self): ) self.args = parser.parse_args() - self.logger.setLevel(self._log_levels[self.args.verbose]) + self.logger.setLevel(self._log_levels[self.verbose]) self.commandline = " ".join(sys.argv) diff --git a/src/data/calibrate.py b/src/data/calibrate.py index 4833568e..68be6a61 100755 --- a/src/data/calibrate.py +++ b/src/data/calibrate.py @@ -600,6 +600,49 @@ class Calibrate_NetCDF: logger.addHandler(_handler) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) + # noqa: PLR0913 - Many parameters needed for initialization + def __init__( # noqa: PLR0913 + self, + auv_name: str, + mission: str, + base_path: str, + calibration_dir: str, + plot: str = None, + verbose: int = 0, + commandline: str = "", + local: bool = False, # noqa: FBT001, FBT002 + noinput: bool = False, # noqa: FBT001, FBT002 + clobber: bool = False, # noqa: FBT001, FBT002 + noreprocess: bool = False, # noqa: FBT001, FBT002 + ) -> None: + """Initialize Calibrate_NetCDF with explicit parameters. + + Args: + auv_name: Name of the AUV + mission: Mission identifier + base_path: Base directory path for data + calibration_dir: Directory containing calibration files + plot: Optional plot specification + verbose: Verbosity level (0=WARN, 1=INFO, 2=DEBUG) + commandline: Command line string for metadata + local: Use local data only (no downloads) + noinput: Don't prompt for user input + clobber: Overwrite existing files + noreprocess: Skip reprocessing if output exists + """ + self.auv_name = auv_name + self.mission = mission + self.base_path = base_path + self.calibration_dir = calibration_dir + self.plot = plot + self.verbose = verbose + self.commandline = commandline + self.local = local + self.noinput = noinput + self.clobber = clobber + self.noreprocess = noreprocess + self.logger.setLevel(self._log_levels[verbose]) + def global_metadata(self): """Use instance variables to return a dictionary of metadata specific for the data that are written @@ -631,7 +674,7 @@ def global_metadata(self): metadata["history"] = f"Created by {self.commandline} on {iso_now}" metadata["title"] = ( - f"Calibrated AUV sensor data from {self.args.auv_name} mission {self.args.mission}" + f"Calibrated AUV sensor data from {self.auv_name} mission {self.mission}" ) metadata["summary"] = ( "Observational oceanographic data obtained from an Autonomous" @@ -665,7 +708,7 @@ def _get_file(self, download_url, local_filename, session): with Path(local_filename).open("wb") as handle: for chunk in resp.content.iter_chunked(1024): handle.write(chunk) - if self.args.verbose > 1: + if self.verbose > 1: self.logger.info("%s(done)", Path(local_filename).name) def _define_sensor_info(self, start_datetime): @@ -830,7 +873,7 @@ class SensorOffset(NamedTuple): ) # Changes over time - if self.args.auv_name.lower().startswith("dorado"): + if self.auv_name.lower().startswith("dorado"): self.sinfo["depth"]["sensor_offset"] = None if start_datetime >= datetime(2007, 4, 30, tzinfo=UTC): # First missions with 10 Gulpers: 2007.120.00 & 2007.120.01 @@ -1199,7 +1242,7 @@ def _read_oxy_coeffs( # noqa: C901, PLR0912, PLR0915 self.logger.debug( "Finding calibration file for oxygen serial number = %s on mission %s", serial_number, - self.args.mission, + self.mission, ) safe_calibration_dir = Path(self.calibration_dir).resolve() @@ -1244,7 +1287,7 @@ def _read_oxy_coeffs( # noqa: C901, PLR0912, PLR0915 self.logger.info( "Breaking from loop as %s is after %s with mission_start=%s", cal_dates[cal_date], - self.args.mission, + self.mission, mission_start, ) break @@ -1254,14 +1297,14 @@ def _read_oxy_coeffs( # noqa: C901, PLR0912, PLR0915 self.logger.info( "File %s is just before %s with mission_start=%s", cal_dates[cal_date_to_use], - self.args.mission, + self.mission, mission_start, ) else: self.logger.info( "File %s is the first calibration file, but is after %s with mission_start=%s", cal_dates[cal_date_to_use], - self.args.mission, + self.mission, mission_start, ) @@ -1372,7 +1415,7 @@ def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 except AttributeError: error_message = ( f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" + f" in {Path(MISSIONLOGS, self.mission)}" ) raise EOFError(error_message) from None @@ -1514,7 +1557,7 @@ def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 # - all missions in Monterey Bay (Zone 10) self.logger.info( "Converting from Easting/Northing to lat/lon for mission %s", - self.args.mission, + self.mission, ) proj = pyproj.Proj(proj="utm", zone=10, ellps="WGS84", radians=False) navlons, navlats = proj( @@ -1571,14 +1614,14 @@ def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 # pdIndx = find(Nav.depth > 1); # posDepths = Nav.depth(pdIndx); pos_depths = np.where(self.combined_nc["navigation_depth"].to_numpy() > 1) - if self.args.mission in {"2013.301.02", "2009.111.00"}: + if self.mission in {"2013.301.02", "2009.111.00"}: self.logger.info("Bypassing Nav QC depth check") maxGoodDepth = 1250 else: if pos_depths[0].size == 0: self.logger.warning( "No positive depths found in %s/navigation.nc", - self.args.mission, + self.mission, ) maxGoodDepth = 1250 else: @@ -1586,15 +1629,15 @@ def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 self.logger.debug("median of positive valued depths = %s", np.median(pos_depths)) if maxGoodDepth < 0: maxGoodDepth = 100 # Fudge for the 2009.272.00 mission where median was -0.1347! - if self.args.mission == "2010.153.01": + if self.mission == "2010.153.01": maxGoodDepth = 1250 # Fudge for 2010.153.01 where the depth was bogus, about 1.3 self.logger.debug("Finding depths less than '%s' and times > 0'", maxGoodDepth) - if self.args.mission == "2010.172.01": + if self.mission == "2010.172.01": self.logger.info( "Performing special QC for %s/navigation.nc", - self.args.mission, + self.mission, ) self._range_qc_combined_nc( instrument="navigation", @@ -1624,9 +1667,9 @@ def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 "2017.347.00", "2017.304.00", } - if self.args.mission in missions_to_check: + if self.mission in missions_to_check: self.logger.info( - "Removing points outside of Monterey Bay for %s/navigation.nc", self.args.mission + "Removing points outside of Monterey Bay for %s/navigation.nc", self.mission ) self._range_qc_combined_nc( instrument="navigation", @@ -1636,10 +1679,10 @@ def _navigation_process(self, sensor): # noqa: C901, PLR0912, PLR0915 "navigation_latitude": Range(36, 37), }, ) - if self.args.mission == "2010.284.00": + if self.mission == "2010.284.00": self.logger.info( "Removing points outside of time range for %s/navigation.nc", - self.args.mission, + self.mission, ) self._range_qc_combined_nc( instrument="navigation", @@ -1672,8 +1715,8 @@ def _nudge_pos(self, max_sec_diff_at_end=10): gps_longitude=lon_fix, gps_latitude=lat_fix, logger=self.logger, - auv_name=self.args.auv_name, - mission=self.args.mission, + auv_name=self.auv_name, + mission=self.mission, max_sec_diff_at_end=max_sec_diff_at_end, create_plots=False, ) @@ -1691,27 +1734,27 @@ def _gps_process(self, sensor): self.logger.exception("%s", e) # noqa: TRY401 return except AttributeError: - if self.args.mission == "2010.151.04": + if self.mission == "2010.151.04": # Gulf of Mexico mission - use data from usbl.dat file(s) usbl_file = Path( - self.args.base_path, - self.args.auv_name, + self.base_path, + self.auv_name, MISSIONNETCDFS, - self.args.mission, + self.mission, "usbl.nc", ) if not usbl_file.exists(): # Copy from archive AUVCTD/missionnetcdfs/YYYY/YYYYJJJ the usbl.nc file from archive import AUVCTD_VOL - year = self.args.mission.split(".")[0] - YYYYJJJ = "".join(self.args.mission.split(".")[:2]) + year = self.mission.split(".")[0] + YYYYJJJ = "".join(self.mission.split(".")[:2]) missionnetcdfs_dir = Path( AUVCTD_VOL, MISSIONNETCDFS, year, YYYYJJJ, - self.args.mission, + self.mission, ) shutil.copyfile( Path(missionnetcdfs_dir, "usbl.nc"), @@ -1732,7 +1775,7 @@ def _gps_process(self, sensor): else: error_message = ( f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" + f" in {Path(MISSIONLOGS, self.mission)}" ) raise EOFError(error_message) from None @@ -1782,7 +1825,7 @@ def _gps_process(self, sensor): "units": "degrees_east", "comment": f"longitude from {source}", } - if self.args.mission in { + if self.mission in { "2004.345.00", "2005.240.00", "2007.134.09", @@ -1797,9 +1840,7 @@ def _gps_process(self, sensor): "2017.304.00", "2011.166.00", }: - self.logger.info( - "Removing points outside of Monterey Bay for %s/gps.nc", self.args.mission - ) + self.logger.info("Removing points outside of Monterey Bay for %s/gps.nc", self.mission) self._range_qc_combined_nc( instrument="gps", variables=vars_to_qc, @@ -1888,12 +1929,12 @@ def _depth_process(self, sensor, latitude=36, cutoff_freq=1): # noqa: PLR0915 "2012.258.00": Range(-1, 160), # Shallow Monterey Bay "2012.270.04": Range(-1, 30), # Shallow Monterey Bay } - if self.args.mission in mission_depth_ranges: - valid_depth_range = mission_depth_ranges[self.args.mission] + if self.mission in mission_depth_ranges: + valid_depth_range = mission_depth_ranges[self.mission] self.logger.info( - "Removing depths outside of valid_depth_range=%s for self.args.mission=%s", + "Removing depths outside of valid_depth_range=%s for self.mission=%s", valid_depth_range, - self.args.mission, + self.mission, ) out_of_range = np.where( (depths < valid_depth_range.min) | (depths > valid_depth_range.max), @@ -1941,20 +1982,19 @@ def _depth_process(self, sensor, latitude=36, cutoff_freq=1): # noqa: PLR0915 b = signal.windows.boxcar(a) depth_filtpres_boxcar = signal.filtfilt(b, a, pres) pres_plot = True # Set to False for debugging other plots - if self.args.plot and pres_plot: + if self.plot and pres_plot: # Use Pandas to plot multiple columns of data # to validate that the filtering works as expected pbeg = 0 pend = len(depths.get_index("time")) - if self.args.plot.startswith("first"): - pend = int(self.args.plot.split("first")[1]) + if self.plot.startswith("first"): + pend = int(self.plot.split("first")[1]) df_plot = pd.DataFrame(index=depths.get_index("time")[pbeg:pend]) df_plot["pres"] = pres[pbeg:pend] df_plot["depth_filtpres_butter"] = depth_filtpres_butter[pbeg:pend] df_plot["depth_filtpres_boxcar"] = depth_filtpres_boxcar[pbeg:pend] title = ( - f"First {pend} points from" - f" {self.args.mission}/{self.sinfo[sensor]['data_filename']}" + f"First {pend} points from" f" {self.mission}/{self.sinfo[sensor]['data_filename']}" ) ax = df_plot.plot(title=title, figsize=(18, 6)) ax.grid("on") @@ -2144,19 +2184,18 @@ def _hs2_process(self, sensor, logs_dir): # noqa: C901, PLR0912, PLR0915 red_bs = red_bs[:][~mfl.mask] red_blue_plot = True # Set to False for debugging other plots - if self.args.plot and red_blue_plot: + if self.plot and red_blue_plot: # Use Pandas to more easiily plot multiple columns of data pbeg = 0 pend = len(blue_bs.get_index("hs2_time")) - if self.args.plot.startswith("first"): - pend = int(self.args.plot.split("first")[1]) + if self.plot.startswith("first"): + pend = int(self.plot.split("first")[1]) df_plot = pd.DataFrame(index=blue_bs.get_index("hs2_time")[pbeg:pend]) df_plot["blue_bs"] = blue_bs[pbeg:pend] df_plot["red_bs"] = red_bs[pbeg:pend] ## df_plot["fl"] = fl[pbeg:pend] title = ( - f"First {pend} points from" - f" {self.args.mission}/{self.sinfo[sensor]['data_filename']}" + f"First {pend} points from" f" {self.mission}/{self.sinfo[sensor]['data_filename']}" ) ax = df_plot.plot(title=title, figsize=(18, 6), ylim=(-0.003, 0.004)) ax.grid("on") @@ -2189,7 +2228,7 @@ def _hs2_process(self, sensor, logs_dir): # noqa: C901, PLR0912, PLR0915 sensor, orig_nc, ) - out_fn = f"{self.args.auv_name}_{self.args.mission}_cal.nc" + out_fn = f"{self.auv_name}_{self.mission}_cal.nc" self.combined_nc[f"{sensor}_depth"].attrs = { "long_name": "Depth", "units": "m", @@ -2299,7 +2338,7 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 except AttributeError: error_message = ( f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" + f" in {Path(MISSIONLOGS, self.mission)}" ) raise EOFError(error_message) from None @@ -2338,8 +2377,10 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 self.combined_nc[f"{sensor}_temperature"] = temperature self.logger.debug("Calling _calibrated_sal_from_cond_frequency()") + # Create a simple namespace for backward compatibility with helper functions + args_ns = type("obj", (object,), {"plot": self.plot})() cal_conductivity, cal_salinity = _calibrated_sal_from_cond_frequency( - self.args, + args_ns, self.combined_nc, self.logger, cf, @@ -2466,12 +2507,12 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 "", ) except KeyError: - self.logger.debug("No dissolvedO2 data in %s", self.args.mission) + self.logger.debug("No dissolvedO2 data in %s", self.mission) except ValueError as e: cfg_file = Path( MISSIONLOGS, - "".join(self.args.mission.split(".")[:2]), - self.args.mission, + "".join(self.mission.split(".")[:2]), + self.mission, self.sinfo["ctd"]["cal_filename"], ) self.logger.exception("Likely missing a calibration coefficient in %s", cfg_file) @@ -2504,7 +2545,7 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 "port", ) except KeyError: - self.logger.debug("No dissolvedO2_port data in %s", self.args.mission) + self.logger.debug("No dissolvedO2_port data in %s", self.mission) self.logger.debug("Collecting dissolvedO2_port") try: dissolvedO2_stbd = xr.DataArray( @@ -2533,7 +2574,7 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 "stbd", ) except KeyError: - self.logger.debug("No dissolvedO2_port data in %s", self.args.mission) + self.logger.debug("No dissolvedO2_port data in %s", self.mission) # === flow variables === # A lot of 0.0 values in Dorado missions until about 2020.282.01 @@ -2552,7 +2593,7 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 } self.combined_nc[f"{sensor}_flow1"] = flow1 except KeyError: - self.logger.debug("No flow1 data in %s", self.args.mission) + self.logger.debug("No flow1 data in %s", self.mission) self.logger.debug("Collecting flow2") try: flow2 = xr.DataArray( @@ -2568,7 +2609,7 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 } self.combined_nc[f"{sensor}_flow2"] = flow2 except KeyError: - self.logger.debug("No flow2 data in %s", self.args.mission) + self.logger.debug("No flow2 data in %s", self.mission) # === beam_transmittance variable from seabird25p on i2map vehicle === try: @@ -2594,7 +2635,7 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 except KeyError: self.logger.debug( "No transmissometer data in %s/%s.nc", - self.args.mission, + self.mission, sensor, ) @@ -2602,7 +2643,7 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 sensor, orig_nc, ) - out_fn = f"{self.args.auv_name}_{self.args.mission}_cal.nc" + out_fn = f"{self.auv_name}_{self.mission}_cal.nc" self.combined_nc[f"{sensor}_depth"].attrs = { "long_name": "Depth", "units": "m", @@ -2629,13 +2670,13 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 self.combined_nc[f"{sensor}_par"] = par except KeyError: - self.logger.debug("No par data in %s/%s.nc", self.args.mission, sensor) + self.logger.debug("No par data in %s/%s.nc", self.mission, sensor) self.combined_nc[f"{sensor}_depth"] = self._geometric_depth_correction( sensor, orig_nc, ) - out_fn = f"{self.args.auv_name}_{self.args.mission}_cal.nc" + out_fn = f"{self.auv_name}_{self.mission}_cal.nc" self.combined_nc[f"{sensor}_depth"].attrs = { "long_name": "Depth", "units": "m", @@ -2648,7 +2689,7 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 # === ad hoc Range checking === self.logger.info( - "Performing range checking of %s in %s/%s.nc", vars_to_qc, self.args.mission, sensor + "Performing range checking of %s in %s/%s.nc", vars_to_qc, self.mission, sensor ) self._range_qc_combined_nc( instrument=sensor, @@ -2656,9 +2697,9 @@ def _ctd_process(self, logs_dir, sensor, cf): # noqa: C901, PLR0912, PLR0915 ranges={f"{sensor}_salinity": Range(30, 40)}, set_to_nan=True, ) - if self.args.mission == "2010.284.00": + if self.mission == "2010.284.00": self.logger.info( - "Removing points outside of time range for %s/%s.nc", self.args.mission, sensor + "Removing points outside of time range for %s/%s.nc", self.mission, sensor ) self._range_qc_combined_nc( instrument=sensor, @@ -2682,7 +2723,7 @@ def _tailcone_process(self, sensor): except AttributeError: error_message = ( f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" + f" in {Path(MISSIONLOGS, self.mission)}" ) raise EOFError(error_message) from None @@ -2727,7 +2768,7 @@ def _ecopuck_process(self, sensor, cf): except AttributeError: error_message = ( f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" + f" in {Path(MISSIONLOGS, self.mission)}" ) raise EOFError(error_message) from None @@ -2842,7 +2883,7 @@ def _biolume_process(self, sensor): except AttributeError: error_message = ( f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" + f" in {Path(MISSIONLOGS, self.mission)}" ) raise EOFError(error_message) from None @@ -2920,9 +2961,9 @@ def _biolume_process(self, sensor): "coordinates": f"{sensor}_{TIME60HZ} {sensor}_depth60hz", "comment": f"raw values from {source} {lag_info}", } - if self.args.mission == "2010.284.00": + if self.mission == "2010.284.00": self.logger.info( - "Removing points outside of time range for %s/biolume.nc", self.args.mission + "Removing points outside of time range for %s/biolume.nc", self.mission ) for time_axis in (TIME, TIME60HZ): self._range_qc_combined_nc( @@ -2953,7 +2994,7 @@ def _lopc_process(self, sensor): except AttributeError: error_message = ( f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" + f" in {Path(MISSIONLOGS, self.mission)}" ) raise EOFError(error_message) from None @@ -2965,7 +3006,7 @@ def _lopc_process(self, sensor): if "time" not in orig_nc.coords: error_message = ( f"{sensor} has no time coordinate - likely an incomplete lopc.nc file" - f" in {Path(MISSIONLOGS, self.args.mission)}" + f" in {Path(MISSIONLOGS, self.mission)}" ) raise EOFError(error_message) @@ -3043,7 +3084,7 @@ def _isus_process(self, sensor): except AttributeError: error_message = ( f"{sensor} has no orig_data - likely a missing or zero-sized .log file" - f" in {Path(MISSIONLOGS, self.args.mission)}" + f" in {Path(MISSIONLOGS, self.mission)}" ) raise EOFError(error_message) from None @@ -3157,7 +3198,7 @@ def _geometric_depth_correction(self, sensor, orig_nc): d_beg_time_diff.astype("timedelta64[s]"), d_end_time_diff.astype("timedelta64[s]"), ) - if self.args.mission in ( + if self.mission in ( "2008.289.03", "2010.259.01", "2010.259.02", @@ -3168,7 +3209,7 @@ def _geometric_depth_correction(self, sensor, orig_nc): self.logger.info( "%s: Special QC for mission %s: Setting corrected_depth to NaN for times after %s", sensor, - self.args.mission, + self.mission, self.combined_nc["depth_time"][-1].to_numpy(), ) corrected_depth[ @@ -3176,7 +3217,7 @@ def _geometric_depth_correction(self, sensor, orig_nc): orig_nc.get_index("time") > self.combined_nc["depth_time"].to_numpy()[-1], ) ] = np.nan - if self.args.plot: + if self.plot: plt.figure(figsize=(18, 6)) plt.plot( orig_nc["time"].to_numpy(), @@ -3192,7 +3233,7 @@ def _geometric_depth_correction(self, sensor, orig_nc): plt.ylabel("Depth (m) & Pitch (deg)") plt.legend(("Original depth", "Pitch corrected depth", "Pitch")) plt.title( - f"Original and pitch corrected depth for {self.args.auv_name} {self.args.mission}", + f"Original and pitch corrected depth for {self.auv_name} {self.mission}", ) plt.show() @@ -3231,11 +3272,10 @@ def _process(self, sensor, logs_dir, netcdfs_dir): # noqa: C901, PLR0912 elif hasattr(getattr(self, sensor), "orig_data"): self.logger.warning("No method (yet) to process %s", sensor) - def write_netcdf(self, netcdfs_dir, vehicle: str = "", name: str = "") -> None: - name = name or self.args.mission - vehicle = vehicle or self.args.auv_name + def write_netcdf(self, netcdfs_dir: Path) -> None: + """Write calibrated netCDF file using instance attributes.""" self.combined_nc.attrs = self.global_metadata() - out_fn = Path(netcdfs_dir, f"{vehicle}_{name}_cal.nc") + out_fn = Path(netcdfs_dir, f"{self.auv_name}_{self.mission}_cal.nc") self.logger.info("Writing calibrated instrument data to %s", out_fn) if Path(out_fn).exists(): Path(out_fn).unlink() @@ -3245,12 +3285,13 @@ def write_netcdf(self, netcdfs_dir, vehicle: str = "", name: str = "") -> None: ", ".join(sorted(self.combined_nc.variables)), ) - def process_logs(self, vehicle: str = "", name: str = "", process_gps: bool = True) -> None: # noqa: FBT001, FBT002 - name = name or self.args.mission - vehicle = vehicle or self.args.auv_name - logs_dir = Path(self.args.base_path, vehicle, MISSIONLOGS, name) - netcdfs_dir = Path(self.args.base_path, vehicle, MISSIONNETCDFS, name) - start_datetime = datetime.strptime(".".join(name.split(".")[:2]), "%Y.%j").astimezone( + def process_logs(self, process_gps: bool = True) -> Path: # noqa: FBT001, FBT002 + """Process logs using instance attributes.""" + logs_dir = Path(self.base_path, self.auv_name, MISSIONLOGS, self.mission) + netcdfs_dir = Path(self.base_path, self.auv_name, MISSIONNETCDFS, self.mission) + start_datetime = datetime.strptime( + ".".join(self.mission.split(".")[:2]), "%Y.%j" + ).astimezone( UTC, ) self._define_sensor_info(start_datetime) @@ -3261,12 +3302,12 @@ def process_logs(self, vehicle: str = "", name: str = "", process_gps: bool = Tr if not process_gps and sensor == "gps": continue # to skip gps processing in conftest.py fixture getattr(self, sensor).cal_align_data = xr.Dataset() - self.logger.debug("Processing %s %s %s", vehicle, name, sensor) + self.logger.debug("Processing %s %s %s", self.auv_name, self.mission, sensor) try: self._process(sensor, logs_dir, netcdfs_dir) except EOFError as e: - short_name = vehicle.lower() - if vehicle == "Dorado389": + short_name = self.auv_name.lower() + if self.auv_name == "Dorado389": # For supporting pytest & conftest.py fixture short_name = "dorado" if sensor in EXPECTED_SENSORS[short_name]: @@ -3303,7 +3344,7 @@ def process_command_line(self): ) self.args = parser.parse_args() - self.logger.setLevel(self._log_levels[self.args.verbose]) + self.logger.setLevel(self._log_levels[self.verbose]) self.commandline = " ".join(sys.argv) diff --git a/src/data/combine.py b/src/data/combine.py index 9141192e..cc63b555 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -91,6 +91,27 @@ class Combine_NetCDF: _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) variable_time_coord_mapping: dict = {} + def __init__( + self, + log_file: str, + verbose: int = 0, + plot: str = None, + commandline: str = "", + ) -> None: + """Initialize Combine_NetCDF with explicit parameters. + + Args: + log_file: LRAUV log file path for processing + verbose: Verbosity level (0=WARN, 1=INFO, 2=DEBUG) + plot: Optional plot specification + commandline: Command line string for metadata + """ + self.log_file = log_file + self.verbose = verbose + self.plot = plot + self.commandline = commandline + self.logger.setLevel(self._log_levels[verbose]) + def global_metadata(self): """Use instance variables to return a dictionary of metadata specific for the data that are written @@ -121,7 +142,7 @@ def global_metadata(self): metadata["useconst"] = "Not intended for legal use. Data may contain inaccuracies." metadata["history"] = f"Created by {self.commandline} on {iso_now}" metadata["variable_time_coord_mapping"] = json.dumps(self.variable_time_coord_mapping) - log_file = self.args.log_file + log_file = self.log_file metadata["title"] = ( f"Combined LRAUV data from {log_file} - relevant variables extracted for STOQS" ) @@ -587,13 +608,12 @@ def _add_consolidation_comment(self, time_info: dict) -> None: f"Consolidated time coordinate from: {mapping_info}" ) - def _initial_coordinate_qc(self, log_file: str = "") -> None: + def _initial_coordinate_qc(self) -> None: """Perform initial QC on core coordinate variables for specific log files.""" - log_file = log_file or self.args.log_file - if log_file in ( + if self.log_file in ( "tethys/missionlogs/2012/20120908_20120920/20120909T010636/201209090106_201209091521.nc4", ): - self.logger.info("Performing initial coordinate QC for %s", self.args.log_file) + self.logger.info("Performing initial coordinate QC for %s", self.log_file) self._range_qc_combined_nc( instrument="universals", variables=[ @@ -619,10 +639,9 @@ def _initial_coordinate_qc(self, log_file: str = "") -> None: set_to_nan=False, ) - def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10, log_file: str = "") -> None: + def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10) -> None: """Add nudged longitude and latitude variables to the combined dataset.""" - log_file = log_file or self.args.log_file - self._initial_coordinate_qc(log_file=log_file) + self._initial_coordinate_qc() try: nudged_longitude, nudged_latitude, segment_count, segment_minsum = nudge_positions( nav_longitude=self.combined_nc["universals_longitude"], @@ -632,9 +651,9 @@ def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10, log_file: str = logger=self.logger, auv_name="", mission="", - log_file=log_file, + log_file=self.log_file, max_sec_diff_at_end=max_sec_diff_at_end, - create_plots=self.args.plot, + create_plots=self.plot, ) except ValueError as e: self.logger.error("Nudging positions failed: %s", e) # noqa: TRY400 @@ -682,11 +701,10 @@ def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10, log_file: str = ), } - def combine_groups(self, log_file: str = None) -> None: + def combine_groups(self) -> None: """Combine group files into a single NetCDF dataset with consolidated time coordinates.""" - log_file = self.args.log_file or log_file - src_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) - group_files = sorted(src_dir.glob(f"{Path(log_file).stem}_{GROUP}_*.nc")) + src_dir = Path(BASE_LRAUV_PATH, Path(self.log_file).parent) + group_files = sorted(src_dir.glob(f"{Path(self.log_file).stem}_{GROUP}_*.nc")) self.summary_fields = set() self.combined_nc = xr.Dataset() @@ -711,22 +729,21 @@ def combine_groups(self, log_file: str = None) -> None: self.variable_time_coord_mapping.update(time_info["variable_time_coord_mapping"]) # Write intermediate file for cf_xarray decoding - intermediate_file = self._intermediate_write_netcdf(log_file=log_file) + intermediate_file = self._intermediate_write_netcdf() with xr.open_dataset(intermediate_file, decode_cf=True) as ds: self.combined_nc = ds.load() # Add nudged coordinates - self._add_nudged_coordinates(log_file=log_file) + self._add_nudged_coordinates() # Clean up intermediate file Path(intermediate_file).unlink() - def _intermediate_write_netcdf(self, log_file: str = None) -> None: + def _intermediate_write_netcdf(self) -> None: """Write out an intermediate combined netCDF file so that data can be read using decode_cf=True for nudge_positions() to work with cf accessors.""" - log_file = self.args.log_file or log_file - netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) - out_fn = Path(netcdfs_dir, f"{Path(log_file).stem}_combined_intermediate.nc") + netcdfs_dir = Path(BASE_LRAUV_PATH, Path(self.log_file).parent) + out_fn = Path(netcdfs_dir, f"{Path(self.log_file).stem}_combined_intermediate.nc") self.combined_nc.attrs = self.global_metadata() self.logger.info("Writing intermediate combined group data to %s", out_fn) @@ -743,10 +760,10 @@ def _intermediate_write_netcdf(self, log_file: str = None) -> None: ) return out_fn - def write_netcdf(self, log_file: str = None) -> None: - log_file = self.args.log_file or log_file - netcdfs_dir = Path(BASE_LRAUV_PATH, Path(log_file).parent) - out_fn = Path(netcdfs_dir, f"{Path(log_file).stem}_combined.nc") + def write_netcdf(self) -> None: + """Write combined netCDF file using instance attributes.""" + netcdfs_dir = Path(BASE_LRAUV_PATH, Path(self.log_file).parent) + out_fn = Path(netcdfs_dir, f"{Path(self.log_file).stem}_combined.nc") self.combined_nc.attrs = self.global_metadata() self.logger.info("Writing combined group data to %s", out_fn) @@ -787,7 +804,7 @@ def process_command_line(self): ) self.args = parser.parse_args() - self.logger.setLevel(self._log_levels[self.args.verbose]) + self.logger.setLevel(self._log_levels[self.verbose]) self.commandline = " ".join(sys.argv) diff --git a/src/data/conftest.py b/src/data/conftest.py index 054dba6a..02c47cb4 100644 --- a/src/data/conftest.py +++ b/src/data/conftest.py @@ -1,5 +1,4 @@ # noqa: INP001 -import logging import os import sys from argparse import Namespace @@ -95,16 +94,23 @@ def create_test_namespace(vehicle_overrides=None, processing_overrides=None): def mission_data(): if not Path(TEST_VEHICLE_DIR).exists(): pytest.fail(f"\n\n{bootstrap_mission}\n") - """Load a short recent mission to have some real data to work with""" - cal_netcdf = Calibrate_NetCDF() - ns = Namespace() + """Load a short mission to have some real data to work with""" # The BASE_PATH environment variable can be set in ci.yml for running in GitHub Actions - ns.base_path = os.getenv("BASE_PATH", BASE_PATH) - ns.auv_name = TEST_VEHICLE - ns.mission = TEST_MISSION - ns.plot = None - cal_netcdf.args = ns - cal_netcdf.logger.setLevel(logging.DEBUG) + base_path = os.getenv("BASE_PATH", BASE_PATH) + + cal_netcdf = Calibrate_NetCDF( + auv_name=TEST_VEHICLE, + mission=TEST_MISSION, + base_path=base_path, + calibration_dir=TEST_CALIBRATION_DIR, + plot=None, + verbose=2, # DEBUG level + commandline="test", + local=True, + noinput=True, + clobber=False, + noreprocess=False, + ) cal_netcdf.process_logs(process_gps=False) return cal_netcdf diff --git a/src/data/create_products.py b/src/data/create_products.py index 54dbdece..fdf0806c 100755 --- a/src/data/create_products.py +++ b/src/data/create_products.py @@ -43,6 +43,35 @@ class CreateProducts: logger.addHandler(_handler) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) + def __init__( # noqa: PLR0913 + self, + auv_name: str = None, + mission: str = None, + base_path: str = str(BASE_PATH), + start_esecs: float = None, + local: bool = False, # noqa: FBT001, FBT002 + verbose: int = 0, + commandline: str = "", + ): + """Initialize CreateProducts with explicit parameters. + + Args: + auv_name: Name of the AUV vehicle + mission: Mission identifier + base_path: Base path for output files + start_esecs: Start epoch seconds for processing + local: Local processing flag + verbose: Verbosity level (0-2) + commandline: Command line string for tracking + """ + self.auv_name = auv_name + self.mission = mission + self.base_path = base_path + self.start_esecs = start_esecs + self.local = local + self.verbose = verbose + self.commandline = commandline + # Column name format required by ODV - will be tab delimited ODV_COLUMN_NAMES = [ # noqa: RUF012 "Cruise", @@ -95,18 +124,18 @@ class CreateProducts: def _open_ds(self): local_nc = Path( BASE_PATH, - self.args.auv_name, + self.auv_name, MISSIONNETCDFS, - self.args.mission, - f"{self.args.auv_name}_{self.args.mission}_{FREQ}.nc", + self.mission, + f"{self.auv_name}_{self.mission}_{FREQ}.nc", ) # Requires mission to have been processed and archived to AUVCTD dap_url = os.path.join( # noqa: PTH118 AUVCTD_OPENDAP_BASE, "surveys", - self.args.mission.split(".")[0], + self.mission.split(".")[0], "netcdf", - f"{self.args.auv_name}_{self.args.mission}_{FREQ}.nc", + f"{self.auv_name}_{self.mission}_{FREQ}.nc", ) try: self.ds = xr.open_dataset(dap_url) @@ -354,13 +383,13 @@ def plot_2column(self) -> str: col = 1 # Save plot to file - images_dir = Path(BASE_PATH, self.args.auv_name, MISSIONIMAGES) + images_dir = Path(BASE_PATH, self.auv_name, MISSIONIMAGES) Path(images_dir).mkdir(parents=True, exist_ok=True) plt.savefig( Path( images_dir, - f"{self.args.auv_name}_{self.args.mission}_{FREQ}_2column.png", + f"{self.auv_name}_{self.mission}_{FREQ}_2column.png", ), ) @@ -390,29 +419,29 @@ def gulper_odv(self, sec_bnds: int = 1) -> str: # noqa: C901, PLR0912, PLR0915 gulper = Gulper() gulper.args = argparse.Namespace() - gulper.args.base_path = self.args.base_path - gulper.args.auv_name = self.args.auv_name - gulper.args.mission = self.args.mission - gulper.args.local = self.args.local - gulper.args.verbose = self.args.verbose - gulper.args.start_esecs = self.args.start_esecs - gulper.logger.setLevel(self._log_levels[self.args.verbose]) + gulper.args.base_path = self.base_path + gulper.args.auv_name = self.auv_name + gulper.args.mission = self.mission + gulper.args.local = self.local + gulper.args.verbose = self.verbose + gulper.args.start_esecs = self.start_esecs + gulper.logger.setLevel(self._log_levels[self.verbose]) gulper.logger.addHandler(self._handler) gulper_times = gulper.parse_gulpers() if not gulper_times: - self.logger.info("No gulper times found for %s", self.args.mission) + self.logger.info("No gulper times found for %s", self.mission) return odv_dir = Path( BASE_PATH, - self.args.auv_name, + self.auv_name, MISSIONODVS, - self.args.mission, + self.mission, ) Path(odv_dir).mkdir(parents=True, exist_ok=True) gulper_odv_filename = Path( odv_dir, - f"{self.args.auv_name}_{self.args.mission}_{FREQ}_Gulper.txt", + f"{self.auv_name}_{self.mission}_{FREQ}_Gulper.txt", ) self._open_ds() @@ -436,7 +465,7 @@ def gulper_odv(self, sec_bnds: int = 1) -> str: # noqa: C901, PLR0912, PLR0915 ) for count, name in enumerate(odv_column_names): if name == "Cruise": - f.write(f"{self.args.auv_name}_{self.args.mission}_{FREQ}") + f.write(f"{self.auv_name}_{self.mission}_{FREQ}") elif name == "Station": f.write(f"{int(gulper_data['profile_number'].to_numpy().mean()):d}") elif name == "Type": @@ -543,7 +572,7 @@ def process_command_line(self): ) self.args = parser.parse_args() - self.logger.setLevel(self._log_levels[self.args.verbose]) + self.logger.setLevel(self._log_levels[self.verbose]) self.commandline = " ".join(sys.argv) diff --git a/src/data/logs2netcdfs.py b/src/data/logs2netcdfs.py index c931bcb8..ad5bac69 100755 --- a/src/data/logs2netcdfs.py +++ b/src/data/logs2netcdfs.py @@ -72,6 +72,68 @@ class AUV_NetCDF: logger.addHandler(_handler) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) + def __init__( # noqa: PLR0913 + self, + auv_name: str = None, + mission: str = None, + vehicle_dir: str = None, + base_path: str = str(BASE_PATH), + start: str = None, + end: str = None, + preview: bool = False, # noqa: FBT001, FBT002 + verbose: int = 0, + title: str = None, + summary: str = None, + add_seconds: float = None, + local: bool = False, # noqa: FBT001, FBT002 + noinput: bool = False, # noqa: FBT001, FBT002 + clobber: bool = False, # noqa: FBT001, FBT002 + noreprocess: bool = False, # noqa: FBT001, FBT002 + use_portal: bool = False, # noqa: FBT001, FBT002 + portal: str = None, + commandline: str = "", + ): + """Initialize AUV_NetCDF with explicit parameters. + + Args: + auv_name: Name of the AUV vehicle + mission: Mission identifier + vehicle_dir: Directory containing vehicle mission logs + base_path: Base path for output files + start: Start datetime for filtering (LRAUV) + end: End datetime for filtering (LRAUV) + preview: Preview mode flag + verbose: Verbosity level (0-2) + title: Custom title for netCDF metadata + summary: Custom summary for netCDF metadata + add_seconds: Seconds to add for time correction + local: Process local mission without standard directory structure + noinput: Don't prompt for user input + clobber: Overwrite existing files + noreprocess: Don't reprocess existing files + use_portal: Use portal for data download + portal: Portal base URL + commandline: Command line string for tracking + """ + self.auv_name = auv_name + self.mission = mission + self.vehicle_dir = vehicle_dir + self.base_path = base_path + self.start = start + self.end = end + self.preview = preview + self.verbose = verbose + self.title = title + self.summary = summary + self.add_seconds = add_seconds + self.local = local + self.noinput = noinput + self.clobber = clobber + self.noreprocess = noreprocess + self.use_portal = use_portal + self.portal = portal + self.commandline = commandline + def read(self, file: Path) -> list[log_record]: """Reads and parses an AUV log and returns a list of `log_records`""" byte_offset = 0 @@ -351,8 +413,8 @@ def _unique_vehicle_names(self): return {d["vehicle"] for d in resp.json()} def _deployments_between(self): - start = f"{self.args.start}T000000Z" - end = f"{self.args.end}T235959Z" + start = f"{self.start}T000000Z" + end = f"{self.end}T235959Z" url = f"{self.deployments_url}?from={start}&to={end}" self.logger.debug("Getting missions from %s", url) with requests.get(url, timeout=TIMEOUT) as resp: @@ -363,15 +425,15 @@ def _deployments_between(self): error_message = f"No missions from {url}" raise LookupError(error_message) for item in resp.json(): - if self.args.preview: - self.logger.setLevel(self._log_levels[max(1, self.args.verbose)]) + if self.preview: + self.logger.setLevel(self._log_levels[max(1, self.verbose)]) self.logger.info("%s %s", item["vehicle"], item["name"]) else: - if self.args.auv_name and item["vehicle"].upper() != self.args.auv_name.upper(): + if self.auv_name and item["vehicle"].upper() != self.auv_name.upper(): self.logger.debug( "%s != %s", item["vehicle"], - self.args.auv_name, + self.auv_name, ) continue try: @@ -392,8 +454,8 @@ def _deployments_between(self): self.download_process_logs(item["vehicle"], item["name"]) def _files_from_mission(self, name=None, vehicle=None): - name = name or self.args.mission - vehicle = vehicle or self.args.auv_name + name = name or self.mission + vehicle = vehicle or self.auv_name files_url = f"{self.portal_base}/files/list/{name}/{vehicle}" self.logger.debug("Getting files list from %s", files_url) with requests.get(files_url, timeout=TIMEOUT) as resp: @@ -423,7 +485,7 @@ async def _get_file(self, download_url, local_filename, session): async for chunk in resp.content.iter_chunked(1024): await handle.write(chunk) handle.write(chunk) - if self.args.verbose > 1: + if self.verbose > 1: print( # noqa: T201 f"{Path(local_filename).name}(done) ", end="", @@ -434,8 +496,8 @@ async def _get_file(self, download_url, local_filename, session): self.logger.exception() async def _download_files(self, logs_dir, name=None, vehicle=None): - name = name or self.args.mission - vehicle = vehicle or self.args.auv_name + name = name or self.mission + vehicle = vehicle or self.auv_name tasks = [] async with ClientSession(timeout=TIMEOUT) as session: for ffm in self._files_from_mission(name, vehicle): @@ -583,9 +645,9 @@ def correct_times(self, log_data, add_seconds: int = 0): def write_variables(self, log_data, netcdf_filename): log_data = self._correct_dup_short_names(log_data) - if self.args.mission == "2025.316.02" and self.args.add_seconds: + if self.mission == "2025.316.02" and self.add_seconds: # So far only this mission is known to suffer from GPS Week Rollover bug - log_data = self.correct_times(log_data, self.args.add_seconds) + log_data = self.correct_times(log_data, self.add_seconds) self.nc_file.createDimension(TIME, len(log_data[0].data)) for variable in log_data: self.logger.debug( @@ -725,19 +787,19 @@ def _process_log_file(self, log_filename, netcdf_filename, src_dir=None): # Add the global metadata, overriding with command line options provided self.add_global_metadata() - vehicle = self.args.auv_name + vehicle = self.auv_name self.nc_file.title = f"Original AUV {vehicle} data converted from {log_filename}" - if hasattr(self.args, "title") and self.args.title: - self.nc_file.title = self.args.title + if self.title: + self.nc_file.title = self.title if src_dir: # The source attribute might make more sense for the location of # the source data, but the summary field is shown in STOQS metadata self.nc_file.summary = SUMMARY_SOURCE.format(src_dir) - if hasattr(self.args, "summary") and self.args.summary: - self.nc_file.summary = self.args.summary - if self.args.add_seconds: + if self.summary: + self.nc_file.summary = self.summary + if self.add_seconds: self.nc_file.summary += ( - f". Corrected timeTag variables by adding {self.args.add_seconds} seconds. " + f". Corrected timeTag variables by adding {self.add_seconds} seconds. " ) monotonic = monotonic_increasing_time_indices(self.nc_file["time"][:]) if (~monotonic).any(): @@ -754,15 +816,15 @@ def get_mission_dir(self, mission: str) -> str: """Return the mission directory. This method is nearly identical to the one in the Processor class, but it is used here to be explicit and to avoid the need to import the Processor class.""" - if not Path(self.args.vehicle_dir).exists(): - self.logger.error("%s does not exist.", self.args.vehicle_dir) + if not Path(self.vehicle_dir).exists(): + self.logger.error("%s does not exist.", self.vehicle_dir) self.logger.info("Is %s mounted?", self.mount_dir) sys.exit(1) - if self.args.auv_name.lower() == "dorado": + if self.auv_name.lower() == "dorado": year = mission.split(".")[0] yearyd = "".join(mission.split(".")[:2]) - path = Path(self.args.vehicle_dir, year, yearyd, mission) - elif self.args.auv_name.lower() == "i2map": + path = Path(self.vehicle_dir, year, yearyd, mission) + elif self.auv_name.lower() == "i2map": year = int(mission.split(".")[0]) # Could construct the YYYY/MM/YYYYMMDD path on M3/Master # but use the mission_list() method to find the mission dir instead @@ -770,12 +832,12 @@ def get_mission_dir(self, mission: str) -> str: if mission in missions: path = missions[mission] else: - self.logger.error("Cannot find %s in %s", mission, self.args.vehicle_dir) - error_message = f"Cannot find {mission} in {self.args.vehicle_dir}" + self.logger.error("Cannot find %s in %s", mission, self.vehicle_dir) + error_message = f"Cannot find {mission} in {self.vehicle_dir}" raise FileNotFoundError(error_message) - elif self.args.auv_name == "Dorado389": + elif self.auv_name == "Dorado389": # The Dorado389 vehicle is a special case used for testing locally and in CI - path = self.args.vehicle_dir + path = self.vehicle_dir if not Path(path).exists(): self.logger.error("%s does not exist.", path) error_message = f"{path} does not exist." @@ -788,33 +850,33 @@ def download_process_logs( # noqa: C901, PLR0912, PLR0915 name: str = "", src_dir: Path = Path(), ) -> None: - name = name or self.args.mission - vehicle = vehicle or self.args.auv_name - logs_dir = Path(self.args.base_path, vehicle, MISSIONLOGS, name) + name = name or self.mission + vehicle = vehicle or self.auv_name + logs_dir = Path(self.base_path, vehicle, MISSIONLOGS, name) if src_dir: self.logger.info("src_dir = %s", src_dir) - if not self.args.local: + if not self.local: # As of 20 July 2023 this returns 404, which is distracting # self.logger.debug( # f"Unique vehicle names: {self._unique_vehicle_names()} seconds" # ) yes_no = "Y" if Path(logs_dir, "vehicle.cfg").exists(): - if self.args.noinput: - if self.args.clobber: + if self.noinput: + if self.clobber: self.logger.info("Clobbering existing %s files", logs_dir) else: self.logger.info("%s exists", logs_dir) yes_no = "N" - if self.args.noreprocess: + if self.noreprocess: self.logger.info("Not reprocessing %s", logs_dir) return else: yes_no = input(f"Directory {logs_dir} exists. Re-download? [Y/n]: ") or "Y" if yes_no.upper().startswith("Y"): - if self.args.use_portal: + if self.use_portal: self._portal_download(logs_dir, name=name, vehicle=vehicle) elif src_dir: safe_src_dir = Path(src_dir).resolve() @@ -834,7 +896,7 @@ def download_process_logs( # noqa: C901, PLR0912, PLR0915 self._portal_download(logs_dir, name=name, vehicle=vehicle) self.logger.info("Processing mission: %s %s", vehicle, name) - netcdfs_dir = Path(self.args.base_path, vehicle, MISSIONNETCDFS, name) + netcdfs_dir = Path(self.base_path, vehicle, MISSIONNETCDFS, name) Path(netcdfs_dir).mkdir(parents=True, exist_ok=True) p_start = time.time() for log in LOG_FILES: @@ -864,7 +926,7 @@ def download_process_logs( # noqa: C901, PLR0912, PLR0915 self.logger.info("Time to process: %.2f seconds", time.time() - p_start) def update(self): - self.logger.setLevel(self._log_levels[max(1, self.args.verbose)]) + self.logger.setLevel(self._log_levels[max(1, self.verbose)]) url = "http://portal.shore.mbari.org:8080/auvdata/v1/deployments/update" auv_netcdf.logger.info("Sending an 'update' request: %s", url) resp = requests.post(url, timeout=TIMEOUT) @@ -880,9 +942,9 @@ def update(self): def set_portal(self) -> None: self.portal_base = PORTAL_BASE self.deployments_url = Path(self.portal_base, "deployments") - if hasattr(self.args, "portal") and self.args.portal: - self.portal_base = self.args.portal - self.deployments_url = Path(self.args.portal, "deployments") + if self.portal: + self.portal_base = self.portal + self.deployments_url = Path(self.portal, "deployments") def process_command_line(self): """Process command line arguments using shared parser infrastructure.""" @@ -941,10 +1003,36 @@ def process_command_line(self): help="Directory for the vehicle's mission logs, e.g.: /Volumes/AUVCTD/missionlogs", ) - self.args = parser.parse_args() - self.logger.setLevel(self._log_levels[self.args.verbose]) + args = parser.parse_args() + + # Reinitialize with parsed arguments + self.__init__( + auv_name=args.auv_name, + mission=args.mission, + vehicle_dir=args.vehicle_dir, + base_path=args.base_path, + start=args.start, + end=args.end, + preview=args.preview, + verbose=args.verbose, + title=args.title, + summary=args.summary, + add_seconds=args.add_seconds, + local=args.local, + noinput=args.noinput, + clobber=args.clobber, + noreprocess=args.noreprocess, + use_portal=args.use_portal, + portal=args.portal, + commandline=" ".join(sys.argv), + ) + + # Keep args for backward compatibility with any code that expects it + self.args = args + self.update_attr = args.update # Special case for update flag + + self.logger.setLevel(self._log_levels[self.verbose]) self.set_portal() - self.commandline = " ".join(sys.argv) if __name__ == "__main__": @@ -952,18 +1040,18 @@ def process_command_line(self): auv_netcdf.process_command_line() p_start = time.time() - if auv_netcdf.args.update: + if auv_netcdf.update_attr: auv_netcdf.update() - elif auv_netcdf.args.auv_name and auv_netcdf.args.mission: - if auv_netcdf.args.vehicle_dir: - path = auv_netcdf.get_mission_dir(auv_netcdf.args.mission) + elif auv_netcdf.auv_name and auv_netcdf.mission: + if auv_netcdf.vehicle_dir: + path = auv_netcdf.get_mission_dir(auv_netcdf.mission) auv_netcdf.download_process_logs(src_dir=path) else: raise argparse.ArgumentError( None, "Must provide --vehicle_dir with --auv_name & --mission", ) - elif auv_netcdf.args.start and auv_netcdf.args.end: + elif auv_netcdf.start and auv_netcdf.end: auv_netcdf._deployments_between() else: raise argparse.ArgumentError( diff --git a/src/data/lopcMEP.py b/src/data/lopcMEP.py index 0629acc4..b312d924 100755 --- a/src/data/lopcMEP.py +++ b/src/data/lopcMEP.py @@ -1,7 +1,7 @@ #!/usr/bin/env python __author__ = "Mike McCann" -__version__ = "$Revision: 1.8 $".split()[1] -__date__ = "$Date: 2010/08/30 23:24:40 $".split()[1] +__version__ = ["$Revision:", "1.8", "$"][1] +__date__ = ["$Date:", "2010/08/30", "23:24:40", "$"][1] __copyright__ = "2010" __license__ = "GPL v3" __contact__ = "mccann at mbari.org" diff --git a/src/data/lopcToNetCDF.py b/src/data/lopcToNetCDF.py index 5e4bb948..cab8903d 100755 --- a/src/data/lopcToNetCDF.py +++ b/src/data/lopcToNetCDF.py @@ -1,7 +1,7 @@ #!/usr/bin/env python __author__ = "Mike McCann" -__version__ = "$Revision: 1.43 $".split()[1] -__date__ = "$Date: 2020/11/23 21:40:04 $".split()[1] +__version__ = ["$Revision:", "1.43", "$"][1] +__date__ = ["$Date:", "2020/11/23", "21:40:04", "$"][1] __copyright__ = "2009" __license__ = "GPL v3" __contact__ = "mccann at mbari.org" diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 85438e4a..01e49373 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -168,6 +168,29 @@ class Extract: logger.addHandler(_handler) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) + def __init__( # noqa: PLR0913 + self, + log_file: str = None, + plot_time: bool = False, # noqa: FBT001, FBT002 + filter_monotonic_time: bool = True, # noqa: FBT001, FBT002 + verbose: int = 0, + commandline: str = "", + ) -> None: + """Initialize Extract with explicit parameters. + + Args: + log_file: Log file path for processing + plot_time: Enable time plotting + filter_monotonic_time: Filter out non-monotonic time values + verbose: Verbosity level (0-2) + commandline: Command line string for tracking + """ + self.log_file = log_file + self.plot_time = plot_time + self.filter_monotonic_time = filter_monotonic_time + self.verbose = verbose + self.commandline = commandline + def show_variable_mapping(self): """Show the variable mapping.""" for group, parms in sorted(SCIENG_PARMS.items()): @@ -308,7 +331,7 @@ def _get_time_filters_for_variables( dict: Map of time_coord_name -> {"indices": list[int], "filtered": bool} """ # Check if time filtering is enabled - if not getattr(self.args, "filter_monotonic_time", True): + if not self.filter_monotonic_time: return {} self.logger.info("========================= Group %s =========================", group_name) @@ -462,10 +485,10 @@ def _find_time_coordinates( def _parse_plot_time_argument(self) -> tuple[str | None, str | None]: """Parse the --plot_time argument and return (group_name, time_coord_name).""" - if not getattr(self.args, "plot_time", None): + if not self.plot_time: return None, None - plot_time = self.args.plot_time + plot_time = self.plot_time if not plot_time.startswith("/"): msg = "Invalid plot_time format, must be //" raise ValueError(msg) @@ -930,7 +953,7 @@ def _create_netcdf_file( # noqa: PLR0913 self._copy_global_attributes(src_group, dst_dataset) # Add standard global attributes - log_file = self.args.log_file + log_file = self.log_file for attr_name, attr_value in self.global_metadata(log_file, group_name).items(): dst_dataset.setncattr(attr_name, attr_value) @@ -1013,7 +1036,7 @@ def global_metadata(self, log_file: str, group_name: str): metadata["license"] = metadata["distribution_statement"] metadata["useconst"] = "Not intended for legal use. Data may contain inaccuracies." metadata["history"] = f"Created by {self.commandline} on {iso_now}" - log_file = self.args.log_file + log_file = self.log_file metadata["title"] = f"Extracted LRAUV data from {log_file}, Group: {group_name}" metadata["source"] = ( f"MBARI LRAUV data extracted from {log_file}" @@ -1101,7 +1124,7 @@ def process_command_line(self): ) self.args = parser.parse_args() - self.logger.setLevel(self._log_levels[self.args.verbose]) + self.logger.setLevel(self._log_levels[self.verbose]) self.commandline = " ".join(sys.argv) diff --git a/src/data/process.py b/src/data/process.py index 369ca597..8977425b 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -437,12 +437,22 @@ def get_mission_dir(self, mission: str) -> str: def download_process(self, mission: str, src_dir: str) -> None: self.logger.info("Download and processing steps for %s", mission) - auv_netcdf = AUV_NetCDF() - auv_netcdf.args = self._create_child_namespace(auv_name=self.auv_name, mission=mission) + auv_netcdf = AUV_NetCDF( + auv_name=self.auv_name, + mission=mission, + base_path=str(self.config["base_path"]), + local=self.config["local"], + noinput=self.config["noinput"], + clobber=self.config["clobber"], + noreprocess=self.config["noreprocess"], + use_portal=self.config["use_portal"], + add_seconds=self.config["add_seconds"], + verbose=self.config["verbose"], + commandline=self.commandline, + ) auv_netcdf.set_portal() auv_netcdf.logger.setLevel(self._log_levels[self.config["verbose"]]) auv_netcdf.logger.addHandler(self.log_handler) - auv_netcdf.commandline = self.commandline auv_netcdf.download_process_logs(src_dir=src_dir) auv_netcdf.logger.removeHandler(self.log_handler) @@ -489,14 +499,20 @@ def download_process(self, mission: str, src_dir: str) -> None: def calibrate(self, mission: str) -> None: self.logger.info("Calibration steps for %s", mission) - cal_netcdf = Calibrate_NetCDF() - cal_netcdf.args = self._create_child_namespace( - auv_name=self.auv_name, mission=mission, plot=None + cal_netcdf = Calibrate_NetCDF( + auv_name=self.auv_name, + mission=mission, + base_path=self.config["base_path"], + calibration_dir=self.calibration_dir, + plot=None, + verbose=self.config["verbose"], + commandline=self.commandline, + local=self.config["local"], + noinput=self.config["noinput"], + clobber=self.config["clobber"], + noreprocess=self.config["noreprocess"], ) - cal_netcdf.calibration_dir = self.calibration_dir - cal_netcdf.logger.setLevel(self._log_levels[self.config["verbose"]]) cal_netcdf.logger.addHandler(self.log_handler) - cal_netcdf.commandline = self.commandline try: netcdf_dir = cal_netcdf.process_logs() cal_netcdf.write_netcdf(netcdf_dir) @@ -505,21 +521,24 @@ def calibrate(self, mission: str) -> None: cal_netcdf.logger.removeHandler(self.log_handler) def align(self, mission: str = "", log_file: str = "") -> None: - self.logger.info("Alignment steps for %s", mission) - align_netcdf = Align_NetCDF() - align_netcdf.args = self._create_child_namespace( - auv_name=self.auv_name, mission=mission, plot=None + self.logger.info("Alignment steps for %s", mission or log_file) + align_netcdf = Align_NetCDF( + auv_name=self.auv_name, + mission=mission, + base_path=self.config["base_path"], + log_file=log_file, + plot=None, + verbose=self.config["verbose"], + commandline=self.commandline, ) - align_netcdf.logger.setLevel(self._log_levels[self.config["verbose"]]) align_netcdf.logger.addHandler(self.log_handler) - align_netcdf.commandline = self.commandline try: if log_file: - netcdf_dir = align_netcdf.process_combined(log_file=log_file) - align_netcdf.write_combined_netcdf(netcdf_dir, log_file=log_file) + netcdf_dir = align_netcdf.process_combined() + align_netcdf.write_combined_netcdf(netcdf_dir) else: netcdf_dir = align_netcdf.process_cal() - align_netcdf.write_combined_netcdf(netcdf_dir, vehicle=self.auv_name) + align_netcdf.write_combined_netcdf(netcdf_dir) except (FileNotFoundError, EOFError) as e: align_netcdf.logger.error("%s %s", mission, e) # noqa: TRY400 error_message = f"{mission} {e}" @@ -529,23 +548,29 @@ def align(self, mission: str = "", log_file: str = "") -> None: def resample(self, mission: str = "") -> None: self.logger.info("Resampling steps for %s", mission) - resamp = Resampler() - resamp.args = self._create_child_namespace( - auv_name=self.auv_name, mission=mission, plot=None + resamp = Resampler( + auv_name=self.auv_name, + mission=mission, + log_file=self.config["log_file"], + freq=self.config["freq"], + mf_width=self.config["mf_width"], + flash_threshold=self.config["flash_threshold"], + verbose=self.config["verbose"], + plot=None, + commandline=self.commandline, ) - resamp.commandline = self.commandline resamp.logger.setLevel(self._log_levels[self.config["verbose"]]) resamp.logger.addHandler(self.log_handler) - file_name = f"{resamp.args.auv_name}_{resamp.args.mission}_align.nc" - if resamp.args.log_file: - netcdfs_dir = Path(BASE_LRAUV_PATH, Path(resamp.args.log_file).parent) - nc_file = Path(netcdfs_dir, f"{Path(resamp.args.log_file).stem}_align.nc") + file_name = f"{resamp.auv_name}_{resamp.mission}_align.nc" + if resamp.log_file: + netcdfs_dir = Path(BASE_LRAUV_PATH, Path(resamp.log_file).parent) + nc_file = Path(netcdfs_dir, f"{Path(resamp.log_file).stem}_align.nc") else: nc_file = Path( self.config["base_path"], - resamp.args.auv_name, + resamp.auv_name, MISSIONNETCDFS, - resamp.args.mission, + resamp.mission, file_name, ) if self.config["flash_threshold"] and self.config["resample"]: @@ -556,7 +581,7 @@ def resample(self, mission: str = "") -> None: dap_file_str = os.path.join( # noqa: PTH118 AUVCTD_OPENDAP_BASE.replace("opendap/", ""), "surveys", - resamp.args.mission.split(".")[0], + resamp.mission.split(".")[0], "netcdf", file_name, ) @@ -587,30 +612,39 @@ def archive( If mission is provided, archive the processed data for Dorado class vehicles. If log_file is provided, archive the processed data for LRAUV class vehicles.""" - arch = Archiver(add_logger_handlers) - arch.args = self._create_child_namespace(auv_name=self.auv_name, mission=mission) + arch = Archiver( + add_handlers=add_logger_handlers, + auv_name=self.auv_name, + mission=mission, + clobber=self.config["clobber"], + resample=self.config["resample"], + flash_threshold=self.config["flash_threshold"], + archive_only_products=self.config["archive_only_products"], + create_products=self.config["create_products"], + verbose=self.config["verbose"], + commandline=self.commandline, + ) arch.mount_dir = self.mount_dir - arch.commandline = self.commandline arch.logger.setLevel(self._log_levels[self.config["verbose"]]) if add_logger_handlers: arch.logger.addHandler(self.log_handler) if mission: # Dorado class vehicle archiving self.logger.info("Archiving steps for %s", mission) - file_name_base = f"{arch.args.auv_name}_{arch.args.mission}" + file_name_base = f"{arch.auv_name}_{arch.mission}" nc_file_base = Path( BASE_PATH, - arch.args.auv_name, + arch.auv_name, MISSIONNETCDFS, - arch.args.mission, + arch.mission, file_name_base, ) self.logger.info("nc_file_base = %s, BASE_PATH = %s", nc_file_base, BASE_PATH) if str(BASE_PATH).startswith(("/home/runner/", "/root")): arch.logger.info( "Not archiving %s %s to AUVCTD as it's likely CI testing", - arch.args.auv_name, - arch.args.mission, + arch.auv_name, + arch.mission, ) else: arch.copy_to_AUVTCD(nc_file_base, self.config["freq"]) @@ -623,16 +657,21 @@ def archive( arch.logger.removeHandler(self.log_handler) def create_products(self, mission: str) -> None: - cp = CreateProducts() - cp.args = self._create_child_namespace( - auv_name=self.auv_name, mission=mission, start_esecs=None + cp = CreateProducts( + auv_name=self.auv_name, + mission=mission, + base_path=str(self.config["base_path"]), + start_esecs=None, + local=self.config["local"], + verbose=self.config["verbose"], + commandline=self.commandline, ) cp.logger.setLevel(self._log_levels[self.config["verbose"]]) cp.logger.addHandler(self.log_handler) # cp.plot_biolume() # cp.plot_2column() - if "dorado" in cp.args.auv_name.lower(): + if "dorado" in cp.auv_name.lower(): cp.gulper_odv() cp.logger.removeHandler(self.log_handler) @@ -925,9 +964,13 @@ def process_missions(self, start_year: int = None) -> None: def extract(self, log_file: str) -> None: self.logger.info("Extracting log file: %s", log_file) - extract = Extract() - extract.args = self._create_child_namespace() - extract.commandline = self.commandline + extract = Extract( + log_file=log_file, + plot_time=False, + filter_monotonic_time=True, + verbose=self.config["verbose"], + commandline=self.commandline, + ) extract.logger.setLevel(self._log_levels[self.config["verbose"]]) extract.logger.addHandler(self.log_handler) @@ -943,14 +986,17 @@ def combine(self, log_file: str) -> None: "Equivalent to the calibrate step for Dorado class vehicles. " "Adds nudge positions and more layers of quality control." ) - combine = Combine_NetCDF() - combine.args = self._create_child_namespace(plot=None) - combine.commandline = self.commandline + combine = Combine_NetCDF( + log_file=log_file, + verbose=self.config["verbose"], + plot=None, + commandline=self.commandline, + ) combine.logger.setLevel(self._log_levels[self.config["verbose"]]) combine.logger.addHandler(self.log_handler) - combine.combine_groups(log_file=log_file) - combine.write_netcdf(log_file=log_file) + combine.combine_groups() + combine.write_netcdf() @log_file_processor def process_log_file(self, log_file: str) -> None: diff --git a/src/data/resample.py b/src/data/resample.py index 0fb6a7a0..a8a0750e 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -51,7 +51,41 @@ class Resampler: logger.addHandler(_handler) _log_levels = (logging.WARN, logging.INFO, logging.DEBUG) - def __init__(self) -> None: + def __init__( # noqa: PLR0913 + self, + auv_name: str = None, + mission: str = None, + log_file: str = None, + freq: str = FREQ, + mf_width: int = MF_WIDTH, + flash_threshold: float = None, + verbose: int = 0, + plot: bool = None, # noqa: FBT001 + commandline: str = "", + ) -> None: + """Initialize Resampler with explicit parameters. + + Args: + auv_name: Name of the AUV vehicle + mission: Mission identifier + log_file: Log file path (for LRAUV processing) + freq: Resampling frequency (default: '1S') + mf_width: Median filter width (default: 3) + flash_threshold: Flash detection threshold + verbose: Verbosity level (0-2) + plot: Enable plotting + commandline: Command line string for tracking + """ + self.auv_name = auv_name + self.mission = mission + self.log_file = log_file + self.freq = freq + self.mf_width = mf_width + self.flash_threshold = flash_threshold + self.verbose = verbose + self.plot = plot + self.commandline = commandline + plt.rcParams["figure.figsize"] = (15, 5) self.resampled_nc = xr.Dataset() iso_now = datetime.now(tz=UTC).isoformat().split(".")[0] + "Z" @@ -134,7 +168,7 @@ def _build_global_metadata(self) -> None: self.metadata["summary"] = ( f"Observational oceanographic data obtained from an Autonomous" f" Underwater Vehicle mission with measurements sampled at" - f" {self.args.freq} intervals." + f" {self.freq} intervals." f" Data processed at {iso_now} using MBARI's auv-python software." ) @@ -144,41 +178,41 @@ def dorado_global_metadata(self) -> dict: """ self.metadata["title"] = "Calibrated, " try: - if dorado_info[self.args.mission].get("program"): + if dorado_info[self.mission].get("program"): self.metadata["title"] = ( - f"{dorado_info[self.args.mission]['program']} program - calibrated, " + f"{dorado_info[self.mission]['program']} program - calibrated, " ) except KeyError: self.logger.warning( "No entry for for mission %s program in dorado_info.py", - self.args.mission, + self.mission, ) self.metadata["title"] += ( f"aligned, and resampled AUV sensor data from" - f" {self.args.auv_name} mission {self.args.mission}" + f" {self.auv_name} mission {self.mission}" ) try: self.metadata["summary"] += ( f" Processing log file: {AUVCTD_OPENDAP_BASE}/surveys/" - f"{self.args.mission.split('.')[0]}/netcdf/" - f"{self.args.auv_name}_{self.args.mission}_processing.log" + f"{self.mission.split('.')[0]}/netcdf/" + f"{self.auv_name}_{self.mission}_processing.log" ) except KeyError: # Likely no _1S.nc file was created, hence no summary to append to self.logger.warning( "Could not add processing log file to summary matadata for mission %s", - self.args.mission, + self.mission, ) try: - if dorado_info[self.args.mission].get("program"): - self.metadata["program"] = dorado_info[self.args.mission].get("program") - if dorado_info[self.args.mission].get("comment"): - self.metadata["comment"] = dorado_info[self.args.mission].get("comment") + if dorado_info[self.mission].get("program"): + self.metadata["program"] = dorado_info[self.mission].get("program") + if dorado_info[self.mission].get("comment"): + self.metadata["comment"] = dorado_info[self.mission].get("comment") except KeyError: self.logger.warning( "No entry for for mission %s program or comment in dorado_info.py", - self.args.mission, + self.mission, ) try: # Parse from ctd1_depth comment: "using SensorOffset(x=1.003, y=0.0001)" @@ -189,7 +223,7 @@ def dorado_global_metadata(self) -> dict: except KeyError: self.logger.warning( "No comment for pitch correction in ctd1_depth for mission %s", - self.args.mission, + self.mission, ) return self.metadata @@ -200,7 +234,7 @@ def i2map_global_metadata(self) -> dict: """ self.metadata["title"] = ( f"Calibrated, aligned, and resampled AUV sensor data from" - f" {self.args.auv_name} mission {self.args.mission}" + f" {self.auv_name} mission {self.mission}" ) # Append location of original data files to summary matches = re.search( @@ -212,8 +246,8 @@ def i2map_global_metadata(self) -> dict: " " + matches.group(1) + f". Processing log file: {AUVCTD_OPENDAP_BASE}/surveys/" - + f"{self.args.mission.split('.')[0]}/netcdf/" - + f"{self.args.auv_name}_{self.args.mission}_processing.log" + + f"{self.mission.split('.')[0]}/netcdf/" + + f"{self.auv_name}_{self.mission}_processing.log" ) # Append shortened location of original data files to title # Useful for I2Map data as it's in a YYYY/MM directory structure @@ -235,7 +269,7 @@ def i2map_global_metadata(self) -> dict: except KeyError: self.logger.warning( "No entry for for mission %s comment in dorado_info.py", - self.args.mission, + self.mission, ) return self.metadata @@ -272,12 +306,12 @@ def resample_coordinates(self, instr: str, mf_width: int, freq: str) -> None: self.logger.warning( "Variable %s_depth not found in %s align.nc file", instr, - self.args.mission, + self.mission, ) self.logger.info( "Cannot continue without a pitch corrected depth coordinate", ) - msg = f"{instr}_depth not found in {self.args.auv_name}_{self.args.mission}_align.nc" + msg = f"{instr}_depth not found in {self.auv_name}_{self.mission}_align.nc" raise InvalidAlignFile(msg) from None try: self.df_o[f"{instr}_latitude"] = self.ds[f"{instr}_latitude"].to_pandas() @@ -285,7 +319,7 @@ def resample_coordinates(self, instr: str, mf_width: int, freq: str) -> None: except KeyError: msg = ( f"Variable {instr}_latitude or {instr}_longitude not found in " - f"{self.args.mission} align.nc file" + f"{self.mission} align.nc file" ) self.logger.warning(msg) raise InvalidAlignFile(msg) from None @@ -357,8 +391,8 @@ def save_coordinates( self.resampled_nc["depth"].attrs = self.ds[f"{instr}_depth"].attrs self.resampled_nc["depth"].attrs["comment"] += ( f". {self.ds[f'{instr}_depth'].attrs['comment']}" - f" mean sampled at {self.args.freq} intervals following" - f" {self.args.mf_width} point median filter." + f" mean sampled at {self.freq} intervals following" + f" {self.mf_width} point median filter." ) self.resampled_nc["latitude"].attrs = self.ds[f"{instr}_latitude"].attrs self.resampled_nc["latitude"].attrs["comment"] += ( @@ -602,8 +636,8 @@ def add_biolume_proxies( # noqa: PLR0913, PLR0915 peaks, _ = signal.find_peaks(s_biolume_raw, height=max_bg) s_peaks = pd.Series(s_biolume_raw.iloc[peaks], index=s_biolume_raw.index[peaks]) s_med_bg_peaks = pd.Series(s_med_bg.iloc[peaks], index=s_biolume_raw.index[peaks]) - if self.args.flash_threshold: - flash_threshold = self.args.flash_threshold + if self.flash_threshold: + flash_threshold = self.flash_threshold flash_threshold_note = f"Computed with flash_threshold = {flash_threshold:.0e}" self.logger.info("Using flash_threshold = %.4e", flash_threshold) nbflash_high = s_peaks[s_peaks > (s_med_bg_peaks + flash_threshold)] @@ -1224,7 +1258,7 @@ def resample_mission( # noqa: C901, PLR0912, PLR0915, PLR0913 # nosecone instruments. If we are processing LRAUV data then # use 'ctddseabird', otherwise start with 'ctd1' and fall back to # 'seabird25p' if needed for i2map missions. - pitch_corrected_instr = "ctdseabird" if self.args.log_file else "ctd1" + pitch_corrected_instr = "ctdseabird" if self.log_file else "ctd1" if f"{pitch_corrected_instr}_depth" not in self.ds: pitch_corrected_instr = "seabird25p" if pitch_corrected_instr in instrs_to_pad: @@ -1237,7 +1271,7 @@ def resample_mission( # noqa: C901, PLR0912, PLR0915, PLR0913 freq, ) self.save_coordinates(instr, mf_width, freq, aggregator) - if self.args.plot: + if self.plot: self.plot_coordinates(instr, freq, plot_seconds) self.add_profile(depth_threshold=depth_threshold) if instr != last_instr: @@ -1293,7 +1327,7 @@ def resample_mission( # noqa: C901, PLR0912, PLR0915, PLR0913 f" median filtered with {mf_width} samples" f" and resampled with {aggregator} to {freq} intervals." ) - if self.args.plot: + if self.plot: self.plot_variable(instr, variable, freq, plot_seconds) try: self._build_global_metadata() @@ -1304,18 +1338,18 @@ def resample_mission( # noqa: C901, PLR0912, PLR0915, PLR0913 e, # noqa: TRY401 nc_file, ) - if self.args.auv_name.lower() == "dorado": + if self.auv_name.lower() == "dorado": self.resampled_nc.attrs = self.dorado_global_metadata() - elif self.args.auv_name.lower() == "i2map": + elif self.auv_name.lower() == "i2map": self.resampled_nc.attrs = self.i2map_global_metadata() self.resampled_nc["time"].attrs = { "standard_name": "time", "long_name": "Time (UTC)", } out_fn = str(nc_file).replace("_align.nc", f"_{freq}.nc") - if self.args.flash_threshold and self.args.flash_threshold != FLASH_THRESHOLD: + if self.flash_threshold and self.flash_threshold != FLASH_THRESHOLD: # Append flash_threshold to output filename - ft_ending = f"_ft{self.args.flash_threshold:.0E}.nc".replace("E+", "E") + ft_ending = f"_ft{self.flash_threshold:.0E}.nc".replace("E+", "E") out_fn = out_fn.replace(".nc", ft_ending) self.resampled_nc.to_netcdf(path=out_fn, format="NETCDF4_CLASSIC") self.logger.info("Saved resampled mission to %s", out_fn) @@ -1347,7 +1381,7 @@ def process_command_line(self): ) self.args = parser.parse_args() - self.logger.setLevel(self._log_levels[self.args.verbose]) + self.logger.setLevel(self._log_levels[self.verbose]) self.commandline = " ".join(sys.argv) diff --git a/src/data/usblToNetCDF.py b/src/data/usblToNetCDF.py index ff3324b8..007ec8a6 100755 --- a/src/data/usblToNetCDF.py +++ b/src/data/usblToNetCDF.py @@ -1,7 +1,7 @@ #!/usr/bin/env python __author__ = "Mike McCann" -__version__ = "$Revision: 1.2 $".split()[1] -__date__ = "$Date: 2010/08/24 18:58:19 $".split()[1] +__version__ = ["$Revision:", "1.2", "$"][1] +__date__ = ["$Date:", "2010/08/24", "18:58:19", "$"][1] __copyright__ = "2009" __license__ = "GPL v3" __contact__ = "mccann at mbari.org" From e3140e633f9859460379181a4b667666c4fd5715 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 24 Nov 2025 13:34:22 -0800 Subject: [PATCH 095/121] Reformat lines. --- src/data/calibrate.py | 8 ++------ src/data/resample.py | 3 +-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/data/calibrate.py b/src/data/calibrate.py index 68be6a61..7ea7f9b2 100755 --- a/src/data/calibrate.py +++ b/src/data/calibrate.py @@ -1993,9 +1993,7 @@ def _depth_process(self, sensor, latitude=36, cutoff_freq=1): # noqa: PLR0915 df_plot["pres"] = pres[pbeg:pend] df_plot["depth_filtpres_butter"] = depth_filtpres_butter[pbeg:pend] df_plot["depth_filtpres_boxcar"] = depth_filtpres_boxcar[pbeg:pend] - title = ( - f"First {pend} points from" f" {self.mission}/{self.sinfo[sensor]['data_filename']}" - ) + title = f"First {pend} points from {self.mission}/{self.sinfo[sensor]['data_filename']}" ax = df_plot.plot(title=title, figsize=(18, 6)) ax.grid("on") self.logger.debug("Pausing with plot entitled: %s. Close window to continue.", title) @@ -2194,9 +2192,7 @@ def _hs2_process(self, sensor, logs_dir): # noqa: C901, PLR0912, PLR0915 df_plot["blue_bs"] = blue_bs[pbeg:pend] df_plot["red_bs"] = red_bs[pbeg:pend] ## df_plot["fl"] = fl[pbeg:pend] - title = ( - f"First {pend} points from" f" {self.mission}/{self.sinfo[sensor]['data_filename']}" - ) + title = f"First {pend} points from {self.mission}/{self.sinfo[sensor]['data_filename']}" ax = df_plot.plot(title=title, figsize=(18, 6), ylim=(-0.003, 0.004)) ax.grid("on") self.logger.debug("Pausing with plot entitled: %s. Close window to continue.", title) diff --git a/src/data/resample.py b/src/data/resample.py index a8a0750e..f37182a5 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -188,8 +188,7 @@ def dorado_global_metadata(self) -> dict: self.mission, ) self.metadata["title"] += ( - f"aligned, and resampled AUV sensor data from" - f" {self.auv_name} mission {self.mission}" + f"aligned, and resampled AUV sensor data from {self.auv_name} mission {self.mission}" ) try: self.metadata["summary"] += ( From 4c8593be044ba13d08ad8b5b915d380a75c7a680 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 24 Nov 2025 16:54:23 -0800 Subject: [PATCH 096/121] Add _align_root_group_coordinates() and fix --plot_time. --- src/data/nc42netcdfs.py | 96 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 93 insertions(+), 3 deletions(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 01e49373..73411bfc 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -171,7 +171,7 @@ class Extract: def __init__( # noqa: PLR0913 self, log_file: str = None, - plot_time: bool = False, # noqa: FBT001, FBT002 + plot_time: str = None, filter_monotonic_time: bool = True, # noqa: FBT001, FBT002 verbose: int = 0, commandline: str = "", @@ -180,7 +180,7 @@ def __init__( # noqa: PLR0913 Args: log_file: Log file path for processing - plot_time: Enable time plotting + plot_time: Optional plot time specification (e.g., /latitude_time) filter_monotonic_time: Filter out non-monotonic time values verbose: Verbosity level (0-2) commandline: Command line string for tracking @@ -358,6 +358,10 @@ def _get_time_filters_for_variables( ) time_filters[time_coord_name] = time_filter + # Align latitude and longitude in root group if needed + if group_name == "/": + time_filters = self._align_root_group_coordinates(time_filters, vars_to_extract) + return time_filters def _analyze_original_time_coordinates( @@ -934,6 +938,86 @@ def _calculate_dimension_size( self.logger.debug("Created fixed dimension %s: %s", dim_name, size) return size + def _align_root_group_coordinates( + self, time_filters: dict[str, dict], vars_to_extract: list[str] + ) -> dict[str, dict]: + """Align latitude and longitude indices in root group when they have different lengths. + + When time coordinate filtering removes different numbers of points from latitude_time + and longitude_time, we need to use the union of both filtered indices to keep them + aligned. + + Args: + time_filters: Dictionary mapping time coordinate names to filter info + vars_to_extract: List of variable names being extracted + + Returns: + Modified time_filters with aligned indices for latitude and longitude + """ + # Only apply to root group variables + lat_vars = [v for v in vars_to_extract if v.startswith("latitude")] + lon_vars = [v for v in vars_to_extract if v.startswith("longitude")] + + if not lat_vars or not lon_vars: + return time_filters + + # Find the time coordinates for latitude and longitude + lat_time_coords = [f"{v}_time" for v in lat_vars] + lon_time_coords = [f"{v}_time" for v in lon_vars] + + # Get the filtered time coordinates that exist + lat_filtered = [ + tc for tc in lat_time_coords if tc in time_filters and time_filters[tc]["filtered"] + ] + lon_filtered = [ + tc for tc in lon_time_coords if tc in time_filters and time_filters[tc]["filtered"] + ] + + if not lat_filtered or not lon_filtered: + return time_filters + + # For simplicity, handle the common case of single lat/lon time coordinates + if len(lat_filtered) == 1 and len(lon_filtered) == 1: + lat_tc = lat_filtered[0] + lon_tc = lon_filtered[0] + + # Use numpy arrays for efficient intersection - indices are already lists + lat_indices = np.array(time_filters[lat_tc]["indices"], dtype=np.int64) + lon_indices = np.array(time_filters[lon_tc]["indices"], dtype=np.int64) + + # Quick check if they're already identical using numpy comparison + if lat_indices.shape == lon_indices.shape and np.array_equal(lat_indices, lon_indices): + return time_filters + + # Use numpy's intersect1d for efficient intersection of sorted arrays + # assume_unique=True since indices come from filtered time coordinates + aligned_indices = np.intersect1d(lat_indices, lon_indices, assume_unique=True) + + if len(aligned_indices) < len(lat_indices) or len(aligned_indices) < len(lon_indices): + self.logger.info( + "Aligning root group coordinates: latitude has %d points, " + "longitude has %d points, using %d common indices", + len(lat_indices), + len(lon_indices), + len(aligned_indices), + ) + + # Convert back to list for consistency with the rest of the code + aligned_list = aligned_indices.tolist() + + # Update both time filters with aligned indices + time_filters[lat_tc]["indices"] = aligned_list + time_filters[lon_tc]["indices"] = aligned_list + + # Update comments to reflect alignment + alignment_note = " Aligned with longitude/latitude." + if not time_filters[lat_tc]["comment"].endswith(alignment_note): + time_filters[lat_tc]["comment"] += alignment_note + if not time_filters[lon_tc]["comment"].endswith(alignment_note): + time_filters[lon_tc]["comment"] += alignment_note + + return time_filters + def _create_netcdf_file( # noqa: PLR0913 self, log_file: str, @@ -1124,8 +1208,14 @@ def process_command_line(self): ) self.args = parser.parse_args() - self.logger.setLevel(self._log_levels[self.verbose]) + + # Set instance attributes from parsed arguments + self.log_file = self.args.log_file + self.plot_time = self.args.plot_time + self.filter_monotonic_time = self.args.filter_monotonic_time + self.verbose = self.args.verbose self.commandline = " ".join(sys.argv) + self.logger.setLevel(self._log_levels[self.verbose]) if __name__ == "__main__": From 2e0be4bce972e8c608f197c17022f9071d1e6ab3 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 24 Nov 2025 16:56:30 -0800 Subject: [PATCH 097/121] Add ctdneilbrown for early tethys log_files, other resample() fixes. --- src/data/process.py | 8 ++++---- src/data/resample.py | 10 ++++++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/data/process.py b/src/data/process.py index 8977425b..f672d83b 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -546,12 +546,12 @@ def align(self, mission: str = "", log_file: str = "") -> None: finally: align_netcdf.logger.removeHandler(self.log_handler) - def resample(self, mission: str = "") -> None: + def resample(self, mission: str = "", log_file: str = "") -> None: self.logger.info("Resampling steps for %s", mission) resamp = Resampler( auv_name=self.auv_name, mission=mission, - log_file=self.config["log_file"], + log_file=log_file, freq=self.config["freq"], mf_width=self.config["mf_width"], flash_threshold=self.config["flash_threshold"], @@ -597,7 +597,7 @@ def resample(self, mission: str = "") -> None: subprocess.run([wget_path, dap_file_str, "-O", nc_file_str], check=True) # noqa: S603 try: resamp.resample_mission(nc_file) - except FileNotFoundError as e: + except (FileNotFoundError, InvalidAlignFile) as e: self.logger.error("%s %s", nc_file, e) # noqa: TRY400 finally: resamp.logger.removeHandler(self.log_handler) @@ -1016,7 +1016,7 @@ def process_log_file(self, log_file: str) -> None: netcdfs_dir = self.extract(log_file) self.combine(log_file=log_file) self.align(log_file=log_file) - self.resample() + self.resample(log_file=log_file) # self.create_products(log_file) self.logger.info("Finished processing log file: %s", log_file) diff --git a/src/data/resample.py b/src/data/resample.py index f37182a5..a5679bd1 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -310,7 +310,10 @@ def resample_coordinates(self, instr: str, mf_width: int, freq: str) -> None: self.logger.info( "Cannot continue without a pitch corrected depth coordinate", ) - msg = f"{instr}_depth not found in {self.auv_name}_{self.mission}_align.nc" + if self.log_file: + msg = f"A CTD depth was not found in {self.ds.encoding['source']}" + else: + msg = f"{instr}_depth not found in {self.auv_name}_{self.mission}_align.nc" raise InvalidAlignFile(msg) from None try: self.df_o[f"{instr}_latitude"] = self.ds[f"{instr}_latitude"].to_pandas() @@ -1256,8 +1259,11 @@ def resample_mission( # noqa: C901, PLR0912, PLR0915, PLR0913 # must be as complete as possible as it's used for all the other # nosecone instruments. If we are processing LRAUV data then # use 'ctddseabird', otherwise start with 'ctd1' and fall back to - # 'seabird25p' if needed for i2map missions. + # 'seabird25p' if needed for i2map missions. Early LRAUV missions + # had only CTD_NeilBrown instruments, later ones had CTD_Seabird. pitch_corrected_instr = "ctdseabird" if self.log_file else "ctd1" + if f"{pitch_corrected_instr}_depth" not in self.ds: + pitch_corrected_instr = "ctdneilbrown" if f"{pitch_corrected_instr}_depth" not in self.ds: pitch_corrected_instr = "seabird25p" if pitch_corrected_instr in instrs_to_pad: From add6b51fefcd06b3394fc515b9aa34de9e7add34 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 24 Nov 2025 16:57:19 -0800 Subject: [PATCH 098/121] Set args for running combine.py individually. --- .vscode/launch.json | 9 ++++++--- src/data/combine.py | 16 +++++++++++----- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 82e4948c..bcdd4b44 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -61,9 +61,11 @@ //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/longitude_time"] //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--plot_time", "/latitude_time"] // brizo 20250916T230652 has several ESP Samples from stoqs_lrauv_sep2025 - "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot_time", "/longitude_time"] + //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot_time", "/longitude_time"] //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109_cleaned_by_quinn.nc4", "--plot_time", "/longitude_time"] //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109_cleaned_by_quinn_latlon.nc4", "--plot_time", "/longitude_time"] + // Conflicting sizes for nudged_time and data + "args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--plot_time", "/longitude_time"] }, { "name": "2.0 - calibrate.py", @@ -332,13 +334,14 @@ "request": "launch", "program": "${workspaceFolder}/src/data/process_lrauv.py", "console": "integratedTerminal", + // Lots bad time values in brizo 20250914T080941 due to memory corruption on the vehicle //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber"] //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber", "--no_cleanup"] - //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup"] + "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup"] //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup", "--clobber"] // Has different universals time coodinates for longitude/latitude and depth - "args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901T000000", "--end", "20121101T000000", "--noinput", "--no_cleanup"] + //"args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901T000000", "--end", "20121101T000000", "--noinput", "--no_cleanup"] // Conflicting sizes for nudged_time and data //"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--no_cleanup" //"args": ["-v", "1", "--auv_name", "brizo", "--start", "20250915T000000", "--end", "20250917T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] diff --git a/src/data/combine.py b/src/data/combine.py index cc63b555..352a40a3 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -93,7 +93,7 @@ class Combine_NetCDF: def __init__( self, - log_file: str, + log_file: str = None, verbose: int = 0, plot: str = None, commandline: str = "", @@ -101,7 +101,7 @@ def __init__( """Initialize Combine_NetCDF with explicit parameters. Args: - log_file: LRAUV log file path for processing + log_file: LRAUV log file path for processing (required for processing, optional for CLI) verbose: Verbosity level (0=WARN, 1=INFO, 2=DEBUG) plot: Optional plot specification commandline: Command line string for metadata @@ -110,7 +110,8 @@ def __init__( self.verbose = verbose self.plot = plot self.commandline = commandline - self.logger.setLevel(self._log_levels[verbose]) + if verbose: + self.logger.setLevel(self._log_levels[verbose]) def global_metadata(self): """Use instance variables to return a dictionary of @@ -472,7 +473,7 @@ def _consolidate_group_time_coords(self, ds: xr.Dataset, group_name: str) -> dic # Time coordinates differ - keep them separate time_coord_mapping = {name: f"{group_name}_{name.lower()}" for name in time_vars} - self.logger.warning( + self.logger.info( "Group %s: Time coordinates differ - keeping separate: %s", group_name, list(time_vars.keys()), @@ -804,8 +805,13 @@ def process_command_line(self): ) self.args = parser.parse_args() - self.logger.setLevel(self._log_levels[self.verbose]) + + # Set instance attributes from parsed arguments + self.log_file = self.args.log_file + self.verbose = self.args.verbose + self.plot = "--plot" if self.args.plot else None self.commandline = " ".join(sys.argv) + self.logger.setLevel(self._log_levels[self.verbose]) if __name__ == "__main__": From a3e3487fa045ef0ea64eace6e74a18882bfde99e Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 25 Nov 2025 11:33:17 -0800 Subject: [PATCH 099/121] Metadata fixups: Add deployment name, specific summary info, processing.log link. --- .vscode/launch.json | 7 ++-- src/data/align.py | 28 ++++++++++------ src/data/combine.py | 31 +++++++++++++++-- src/data/nc42netcdfs.py | 10 +++++- src/data/resample.py | 73 ++++++++++++++++++++++++++++++++++------- src/data/utils.py | 52 +++++++++++++++++++++++++++++ 6 files changed, 173 insertions(+), 28 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index bcdd4b44..2cc42ade 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -124,6 +124,9 @@ //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot"] //"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120909T010636/201209090106_201209091521.nc4", "--plot"] + // Conflicting sizes for nudged_time and data + //"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--plot"] + }, { "name": "3.0 - align.py", @@ -338,8 +341,8 @@ //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber"] //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber", "--no_cleanup"] - "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup"] - //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup", "--clobber"] + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup"] + "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup", "--clobber"] // Has different universals time coodinates for longitude/latitude and depth //"args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901T000000", "--end", "20121101T000000", "--noinput", "--no_cleanup"] // Conflicting sizes for nudged_time and data diff --git a/src/data/align.py b/src/data/align.py index 497d47fd..3b951201 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -31,6 +31,7 @@ from common_args import get_standard_lrauv_parser from logs2netcdfs import AUV_NetCDF, MISSIONNETCDFS, SUMMARY_SOURCE, TIME, TIME60HZ from nc42netcdfs import BASE_LRAUV_PATH +from utils import get_deployment_name class InvalidCalFile(Exception): @@ -148,9 +149,13 @@ def global_metadata(self) -> dict: # noqa: PLR0915 " software." ) elif log_file: - metadata["title"] = ( - f"Combined and aligned LRAUV instrument data from log file {Path(log_file)}" - ) + # Build title with optional deployment name + title = f"Combined and aligned LRAUV instrument data from log file {Path(log_file)}" + deployment_name = get_deployment_name(log_file, BASE_LRAUV_PATH, self.logger) + if deployment_name: + title += f" - Deployment: {deployment_name}" + metadata["title"] = title + from_data = "combined data" metadata["source"] = ( f"MBARI Long Range AUV data produced from {from_data}" @@ -158,12 +163,15 @@ def global_metadata(self) -> dict: # noqa: PLR0915 f" host {actual_hostname} using git commit {gitcommit} from" f" software at 'https://github.com/mbari-org/auv-python'" ) - metadata["summary"] = ( - "Observational oceanographic data obtained from an Autonomous" - " Underwater Vehicle mission with measurements at" - " original sampling intervals. The position variables have been" - " corrected to GPS positions and aligned with the data variables" - " using MBARI's auv-python software." + metadata["summary"] = self.combined_nc.attrs.get( + "summary", + ( + "Observational oceanographic data obtained from an Autonomous" + " Underwater Vehicle mission with measurements at" + " original sampling intervals. The position variables have been" + " corrected to GPS positions and aligned with the data variables" + " using MBARI's auv-python software." + ), ) # Append location of original data files to summary if self.auv_name and self.mission: @@ -678,7 +686,7 @@ def write_combined_netcdf(self, netcdfs_dir: Path) -> None: self.logger.debug("Removing existing file %s", out_fn) out_fn.unlink() self.aligned_nc.to_netcdf(out_fn) - self.logger.info( + self.logger.debug( "Data variables written: %s", ", ".join(sorted(self.aligned_nc.variables)), ) diff --git a/src/data/combine.py b/src/data/combine.py index 352a40a3..5ec13ed5 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -55,6 +55,7 @@ from common_args import get_standard_lrauv_parser from logs2netcdfs import AUV_NetCDF, TIME, TIME60HZ from nc42netcdfs import BASE_LRAUV_PATH, GROUP +from utils import get_deployment_name AVG_SALINITY = 33.6 # Typical value for upper 100m of Monterey Bay @@ -110,6 +111,8 @@ def __init__( self.verbose = verbose self.plot = plot self.commandline = commandline + self.nudge_segment_count = None + self.nudge_total_minutes = None if verbose: self.logger.setLevel(self._log_levels[verbose]) @@ -144,9 +147,14 @@ def global_metadata(self): metadata["history"] = f"Created by {self.commandline} on {iso_now}" metadata["variable_time_coord_mapping"] = json.dumps(self.variable_time_coord_mapping) log_file = self.log_file - metadata["title"] = ( - f"Combined LRAUV data from {log_file} - relevant variables extracted for STOQS" - ) + + # Build title with optional deployment name + title = f"Combined LRAUV data from {log_file}" + deployment_name = get_deployment_name(log_file, BASE_LRAUV_PATH, self.logger) + if deployment_name: + title += f" - Deployment: {deployment_name}" + metadata["title"] = title + metadata["summary"] = ( "Observational oceanographic data obtained from a Long Range Autonomous" " Underwater Vehicle mission with measurements at" @@ -156,6 +164,14 @@ def global_metadata(self): if self.summary_fields: # Should be just one item in set, but just in case join them metadata["summary"] += " " + ". ".join(self.summary_fields) + + # Add nudging information to summary if available + if self.nudge_segment_count is not None and self.nudge_total_minutes is not None: + metadata["summary"] += ( + f" {self.nudge_segment_count} underwater segments over " + f"{self.nudge_total_minutes:.1f} minutes nudged toward GPS fixes." + ) + metadata["comment"] = ( f"MBARI Long Range AUV data produced from original data" f" with execution of '{self.commandline}'' at {iso_now} on" @@ -665,6 +681,15 @@ def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10) -> None: segment_count, segment_minsum, ) + + # Calculate total underwater time and store for metadata + time_coord = self.combined_nc[self.variable_time_coord_mapping["universals_longitude"]] + time_diff = time_coord.to_numpy()[-1] - time_coord.to_numpy()[0] + # Convert timedelta64 to seconds (handles nanosecond precision) + total_seconds = float(time_diff / np.timedelta64(1, "s")) + self.nudge_segment_count = segment_count + self.nudge_total_minutes = total_seconds / 60.0 + self.combined_nc["nudged_longitude"] = xr.DataArray( nudged_longitude, coords=[ diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index 73411bfc..b188c234 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -20,6 +20,7 @@ import numpy as np import pooch from common_args import get_standard_lrauv_parser +from utils import get_deployment_name # Conditional imports for plotting (only when needed) try: @@ -1121,7 +1122,14 @@ def global_metadata(self, log_file: str, group_name: str): metadata["useconst"] = "Not intended for legal use. Data may contain inaccuracies." metadata["history"] = f"Created by {self.commandline} on {iso_now}" log_file = self.log_file - metadata["title"] = f"Extracted LRAUV data from {log_file}, Group: {group_name}" + + # Build title with optional deployment name + title = f"Extracted LRAUV data from {log_file}, Group: {group_name}" + deployment_name = get_deployment_name(log_file, BASE_LRAUV_PATH, self.logger) + if deployment_name: + title += f" - Deployment: {deployment_name}" + metadata["title"] = title + metadata["source"] = ( f"MBARI LRAUV data extracted from {log_file}" f" with execution of '{self.commandline}' at {iso_now}" diff --git a/src/data/resample.py b/src/data/resample.py index a5679bd1..85a0d6f6 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -30,7 +30,7 @@ from common_args import get_standard_lrauv_parser from dorado_info import dorado_info from logs2netcdfs import AUV_NetCDF, BASE_PATH, MISSIONNETCDFS, SUMMARY_SOURCE, TIME -from nc42netcdfs import BASE_LRAUV_PATH +from nc42netcdfs import BASE_LRAUV_PATH, BASE_LRAUV_WEB MF_WIDTH = 3 FREQ = "1S" @@ -174,8 +174,13 @@ def _build_global_metadata(self) -> None: def dorado_global_metadata(self) -> dict: """Use instance variables to return a dictionary of - metadata specific for the data that are written + metadata specific for the data that are written. + Calls _build_global_metadata() first to populate common metadata. """ + # First populate common metadata (git commit, host, geospatial bounds, etc.) + self._build_global_metadata() + + # Then add dorado-specific metadata self.metadata["title"] = "Calibrated, " try: if dorado_info[self.mission].get("program"): @@ -229,13 +234,19 @@ def dorado_global_metadata(self) -> dict: def i2map_global_metadata(self) -> dict: """Use instance variables to return a dictionary of - metadata specific for the data that are written + metadata specific for the data that are written. + Calls _build_global_metadata() first to populate common metadata. """ + # First populate common metadata (git commit, host, geospatial bounds, etc.) + self._build_global_metadata() + + # Then add i2map-specific metadata self.metadata["title"] = ( f"Calibrated, aligned, and resampled AUV sensor data from" f" {self.auv_name} mission {self.mission}" ) # Append location of original data files to summary + self.metadata["summary"] = self.ds.attrs.get matches = re.search( "(" + SUMMARY_SOURCE.replace("{}", r".+$") + ")", self.ds.attrs["summary"], @@ -273,6 +284,48 @@ def i2map_global_metadata(self) -> dict: return self.metadata + def lrauv_global_metadata(self) -> dict: + """Use instance variables to return a dictionary of + metadata specific for LRAUV data that are written. + Calls _build_global_metadata() first to populate common metadata. + """ + # First populate common metadata (git commit, host, geospatial bounds, etc.) + self._build_global_metadata() + + # Then add LRAUV-specific metadata + # Preserve title and summary from align.nc if available + if "title" in self.ds.attrs: + self.metadata["title"] = self.ds.attrs["title"].replace( + "Combined and aligned LRAUV", "Combined, Aligned, and Resampled LRAUV" + ) + else: + self.metadata["title"] = ( + f"Resampled LRAUV data from {self.log_file} at {self.freq} intervals" + ) + + if "summary" in self.ds.attrs: + self.metadata["summary"] = self.ds.attrs["summary"] + else: + self.metadata["summary"] = ( + "Observational oceanographic data obtained from a Long Range Autonomous " + "Underwater Vehicle mission. Data have been aligned and resampled." + ) + # Add resampling information and processing log file link to the summary + self.metadata["summary"] += ( + f" Data resampled to {self.freq} intervals following {self.mf_width} " + f"point median filter." + ) + self.metadata["summary"] += ( + f". Processing log file: {BASE_LRAUV_WEB}/" + f"{self.log_file.replace('.nc4', '_processing.log')}" + ) + + # Preserve comment from align.nc if available, otherwise use default + if "comment" in self.ds.attrs: + self.metadata["comment"] = self.ds.attrs["comment"] + + return self.metadata + def instruments_variables(self, nc_file: str) -> dict: """ Return a dictionary of all the variables in the mission netCDF file, @@ -1334,19 +1387,15 @@ def resample_mission( # noqa: C901, PLR0912, PLR0915, PLR0913 ) if self.plot: self.plot_variable(instr, variable, freq, plot_seconds) - try: - self._build_global_metadata() - except KeyError as e: - self.logger.exception( - "Missing global attribute %s in %s. Cannot add global metadata to " - "resampled mission.", - e, # noqa: TRY401 - nc_file, - ) + + # Call vehicle-specific metadata method which will call _build_global_metadata() if self.auv_name.lower() == "dorado": self.resampled_nc.attrs = self.dorado_global_metadata() elif self.auv_name.lower() == "i2map": self.resampled_nc.attrs = self.i2map_global_metadata() + else: + # Assume LRAUV for any other vehicle + self.resampled_nc.attrs = self.lrauv_global_metadata() self.resampled_nc["time"].attrs = { "standard_name": "time", "long_name": "Time (UTC)", diff --git a/src/data/utils.py b/src/data/utils.py index cbea29b8..c55783eb 100644 --- a/src/data/utils.py +++ b/src/data/utils.py @@ -23,7 +23,59 @@ """ +import logging import math +from pathlib import Path + + +def get_deployment_name( + log_file: str, base_lrauv_path: Path, logger: logging.Logger = None +) -> str | None: + """Parse deployment name from .dlist file in great-grandparent directory. + + Args: + log_file: Path to log file (e.g., tethys/missionlogs/2012/20120908_20120920/.../.nc4) + base_lrauv_path: Base path for local LRAUV data + logger: Optional logger for debug messages + + Returns: + Deployment name string or None if not found + """ + try: + log_path = Path(log_file) + # Get great-grandparent directory (e.g., tethys/missionlogs/2012) + great_grandparent_dir = log_path.parent.parent.parent + # The directory with the .dlist file (e.g., 20120908_20120920) + deployment_dir = log_path.parent.parent + # Construct .dlist filename from deployment directory name + dlist_filename = f"{deployment_dir.name}.dlist" + + # Try file share location first (/Volumes/LRAUV/vehicle/missionlogs/YYYY/...) + lrauv_share = Path("/Volumes/LRAUV") + dlist_path = lrauv_share / great_grandparent_dir / dlist_filename + + # If not on file share, try local base_lrauv_path + if not dlist_path.exists(): + dlist_path = Path(base_lrauv_path, great_grandparent_dir, dlist_filename) + + if not dlist_path.exists(): + if logger: + logger.debug("No .dlist file found at %s", dlist_path) + return None + + with dlist_path.open() as f: + first_line = f.readline().strip() + # Parse "# Deployment name: " (case insensitive) + if first_line.lower().startswith("# deployment name:"): + deployment_name = first_line.split(":", 1)[1].strip() + if logger: + logger.debug("Found deployment name: %s", deployment_name) + return deployment_name + return None + except (OSError, IndexError) as e: + if logger: + logger.debug("Error parsing deployment name: %s", e) + return None def simplify_points(pts, tolerance): From 6325a012f6846b10c6688df71f0359dc19f65c3b Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 25 Nov 2025 14:10:11 -0800 Subject: [PATCH 100/121] Skip writing dynamic metadata if running from pytest, add arguments needed for stand-alone execution. --- src/data/resample.py | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/src/data/resample.py b/src/data/resample.py index 85a0d6f6..0b8fe3eb 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -102,6 +102,9 @@ def _build_global_metadata(self) -> None: """ Call following saving of coordinates and variables from resample_mission() """ + # Skip dynamic metadata during testing to ensure reproducible results + if "pytest" in sys.modules: + return {} repo = git.Repo(search_parent_directories=True) try: gitcommit = repo.head.object.hexsha @@ -171,12 +174,17 @@ def _build_global_metadata(self) -> None: f" {self.freq} intervals." f" Data processed at {iso_now} using MBARI's auv-python software." ) + return None def dorado_global_metadata(self) -> dict: """Use instance variables to return a dictionary of metadata specific for the data that are written. Calls _build_global_metadata() first to populate common metadata. """ + # Skip dynamic metadata during testing to ensure reproducible results + if "pytest" in sys.modules: + return {} + # First populate common metadata (git commit, host, geospatial bounds, etc.) self._build_global_metadata() @@ -237,6 +245,10 @@ def i2map_global_metadata(self) -> dict: metadata specific for the data that are written. Calls _build_global_metadata() first to populate common metadata. """ + # Skip dynamic metadata during testing to ensure reproducible results + if "pytest" in sys.modules: + return {} + # First populate common metadata (git commit, host, geospatial bounds, etc.) self._build_global_metadata() @@ -289,6 +301,10 @@ def lrauv_global_metadata(self) -> dict: metadata specific for LRAUV data that are written. Calls _build_global_metadata() first to populate common metadata. """ + # Skip dynamic metadata during testing to ensure reproducible results + if "pytest" in sys.modules: + return {} + # First populate common metadata (git commit, host, geospatial bounds, etc.) self._build_global_metadata() @@ -1415,6 +1431,20 @@ def process_command_line(self): description=__doc__, ) + # Add resampling arguments (freq and mf_width) + parser.add_argument( + "--freq", + type=str, + default=FREQ, + help=f"Resampling frequency, default: {FREQ}", + ) + parser.add_argument( + "--mf_width", + type=int, + default=MF_WIDTH, + help=f"Median filter width for smoothing, default: {MF_WIDTH}", + ) + # Add resample-specific arguments parser.add_argument("--plot", action="store_true", help="Plot data") parser.add_argument( @@ -1435,8 +1465,18 @@ def process_command_line(self): ) self.args = parser.parse_args() - self.logger.setLevel(self._log_levels[self.verbose]) + + # Set instance attributes from parsed arguments + self.auv_name = self.args.auv_name + self.mission = self.args.mission + self.log_file = self.args.log_file + self.freq = self.args.freq + self.mf_width = self.args.mf_width + self.flash_threshold = self.args.flash_threshold + self.verbose = self.args.verbose + self.plot = self.args.plot self.commandline = " ".join(sys.argv) + self.logger.setLevel(self._log_levels[self.verbose]) if __name__ == "__main__": From 95133f75a01f5ac7a758f11904684cb145de1571 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 25 Nov 2025 14:33:57 -0800 Subject: [PATCH 101/121] Remove ending period from summary text. --- src/data/lopcToNetCDF.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/lopcToNetCDF.py b/src/data/lopcToNetCDF.py index cab8903d..75b24898 100755 --- a/src/data/lopcToNetCDF.py +++ b/src/data/lopcToNetCDF.py @@ -1895,7 +1895,7 @@ def openNetCDFFile(self, opts): # noqa: C901, PLR0912, PLR0915 " produced by the LOPC instrument. The data in this file are to be" " considered as simple time series data only and are as close to the" " original data as possible. Further processing is required to turn" - " the data into a time series of profiles." + " the data into a time series of profiles" ) self.ncFile.keywords = "plankton, particles, detritus, marine snow, particle counter" self.ncFile.Conventions = "CF-1.6" From 060a8b3a07445d2305eda42f13870b3ad6976dd7 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 25 Nov 2025 14:43:08 -0800 Subject: [PATCH 102/121] Skip metadata if running from pytest, remove summary note about further processing needed. --- src/data/align.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/src/data/align.py b/src/data/align.py index 3b951201..c9aadc4a 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -84,6 +84,11 @@ def global_metadata(self) -> dict: # noqa: PLR0915 """Use instance variables to return a dictionary of metadata specific for the data that are written """ + # Skip dynamic metadata during testing to ensure reproducible results + if "pytest" in sys.modules: + self.logger.debug("Skipping dynamic metadata generation (running under pytest)") + return {} + auv_name = self.auv_name mission = self.mission log_file = self.log_file @@ -141,12 +146,22 @@ def global_metadata(self) -> dict: # noqa: PLR0915 f" host {actual_hostname} using git commit {gitcommit} from" f" software at 'https://github.com/mbari-org/auv-python'" ) - metadata["summary"] = ( - "Observational oceanographic data obtained from an Autonomous" - " Underwater Vehicle mission with measurements at" - " original sampling intervals. The data have been calibrated" - " and the coordinate variables aligned using MBARI's auv-python" - " software." + metadata["summary"] = self.calibrated_nc.attrs.get( + "summary", + ( + "Observational oceanographic data obtained from an Autonomous" + " Underwater Vehicle mission with measurements at" + " original sampling intervals. The data have been calibrated" + " and the coordinate variables aligned using MBARI's auv-python" + " software." + ), + ) + # Remove note about further processing needed + metadata["summary"] = metadata["summary"].replace( + " The data in this file are to be considered as simple time series data only " + "and are as close to the original data as possible. Further processing is " + "required to turn the data into a time series of profiles.", + "", ) elif log_file: # Build title with optional deployment name From f65e6c36be6283ecccf81500b6ddb0418b6ee8bd Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 26 Nov 2025 11:04:23 -0800 Subject: [PATCH 103/121] Use seg_count rather than i for the segment numbers. --- src/data/AUV.py | 4 ++-- src/data/dorado_info.py | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/data/AUV.py b/src/data/AUV.py index 87aa2b65..6d57ff68 100755 --- a/src/data/AUV.py +++ b/src/data/AUV.py @@ -207,10 +207,11 @@ def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 if len(segi) > MIN_SEGMENT_LENGTH: logger.info( - f"{i:5d}: {end_sec_diff:12.3f} {end_lon_diff:12.7f}" # noqa: G004 + f"{seg_count:5d}: {end_sec_diff:12.3f} {end_lon_diff:12.7f}" # noqa: G004 f" {end_lat_diff:12.7f} {len(segi):-9d} {seg_min:9.2f}" f" {u_drift:14.3f} {v_drift:14.3f} {lat.cf['T'].data[segi][-1]}", ) + seg_count += 1 # Start with zero adjustment at beginning and linearly ramp up to the diff at the end lon_nudge = np.interp( @@ -253,7 +254,6 @@ def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 lon_nudged_array = np.append(lon_nudged_array, lon[segi] + lon_nudge) lat_nudged_array = np.append(lat_nudged_array, lat[segi] + lat_nudge) dt_nudged = np.append(dt_nudged, lon.cf["T"].data[segi]) - seg_count += 1 # Any dead reckoned points after last GPS fix segi = np.where(lat.cf["T"].data > lat_fix.cf["T"].data[-1])[0] diff --git a/src/data/dorado_info.py b/src/data/dorado_info.py index c8335eb8..0d1b30bc 100644 --- a/src/data/dorado_info.py +++ b/src/data/dorado_info.py @@ -2954,3 +2954,11 @@ " - ctdToUse = ctd1 " ), } +dorado_info["2025.316.02"] = { + "program": f"{MBTSLINE}", + "comment": ( + "Monterey Bay MBTS Mission - 31625G" + " ISUS, and LISST payloads removed, main vehicle computer NTP synced with GPS Week Rollover Bug, 1024*7*24*3600 seconds added to timestamps. " + " - ctdToUse = ctd2 " + ), +} From a9ccd0da0d93daa8bf7e8f827c2c7f2d72241e0e Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 26 Nov 2025 11:05:32 -0800 Subject: [PATCH 104/121] Remove trailing period from summary, which later gets a period. --- src/data/logs2netcdfs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/logs2netcdfs.py b/src/data/logs2netcdfs.py index ad5bac69..dbae3819 100755 --- a/src/data/logs2netcdfs.py +++ b/src/data/logs2netcdfs.py @@ -799,7 +799,7 @@ def _process_log_file(self, log_filename, netcdf_filename, src_dir=None): self.nc_file.summary = self.summary if self.add_seconds: self.nc_file.summary += ( - f". Corrected timeTag variables by adding {self.add_seconds} seconds. " + f". Corrected timeTag variables by adding {self.add_seconds} seconds" ) monotonic = monotonic_increasing_time_indices(self.nc_file["time"][:]) if (~monotonic).any(): From a411bd37977415b6da20ac81004478759ce345c5 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 26 Nov 2025 11:06:49 -0800 Subject: [PATCH 105/121] Set instance attributes from parsed arguments, use OrderedDict for summary_fields. --- src/data/calibrate.py | 58 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/src/data/calibrate.py b/src/data/calibrate.py index 7ea7f9b2..7231ea56 100755 --- a/src/data/calibrate.py +++ b/src/data/calibrate.py @@ -603,10 +603,10 @@ class Calibrate_NetCDF: # noqa: PLR0913 - Many parameters needed for initialization def __init__( # noqa: PLR0913 self, - auv_name: str, - mission: str, - base_path: str, - calibration_dir: str, + auv_name: str = None, + mission: str = None, + base_path: str = None, + calibration_dir: str = None, plot: str = None, verbose: int = 0, commandline: str = "", @@ -641,6 +641,8 @@ def __init__( # noqa: PLR0913 self.noinput = noinput self.clobber = clobber self.noreprocess = noreprocess + self.nudge_segment_count = None + self.nudge_total_minutes = None self.logger.setLevel(self._log_levels[verbose]) def global_metadata(self): @@ -649,6 +651,11 @@ def global_metadata(self): """ from datetime import datetime + # Skip dynamic metadata during testing to ensure reproducible results + if "pytest" in sys.modules: + self.logger.debug("Skipping dynamic metadata generation (running under pytest)") + return {} + iso_now = datetime.now(tz=UTC).isoformat() + "Z" metadata = {} @@ -682,9 +689,19 @@ def global_metadata(self): " original sampling intervals. The data have been calibrated" " by MBARI's auv-python software." ) + # Add nudging information to summary if available + self.summary_fields[ + ( + f"{self.nudge_segment_count} underwater segments over " + f"{self.nudge_total_minutes:.1f} minutes nudged toward GPS fixes." + ) + ] = None + + # Join all summary fields into one string if self.summary_fields: - # Should be just one item in set, but just in case join them - metadata["summary"] += " " + ". ".join(self.summary_fields) + # Concatenate all summary field keys in order + metadata["summary"] += " " + ". ".join(self.summary_fields.keys()) + metadata["comment"] = ( f"MBARI Dorado-class AUV data produced from original data" f" with execution of '{self.commandline}'' at {iso_now} on" @@ -976,7 +993,7 @@ def _read_data(self, logs_dir, netcdfs_dir): # noqa: C901, PLR0912 dictionary for hs2 data. Collect summary metadata fields that should describe the source of the data if copied from M3. """ - self.summary_fields = set() + self.summary_fields = OrderedDict() for sensor, info in self.sinfo.items(): sensor_info = SensorInfo() orig_netcdf_filename = Path(netcdfs_dir, info["data_filename"]) @@ -1026,9 +1043,8 @@ def _read_data(self, logs_dir, netcdfs_dir): # noqa: C901, PLR0912 setattr(self, sensor, sensor_info) if hasattr(sensor_info, "orig_data"): try: - self.summary_fields.add( - getattr(self, sensor).orig_data.attrs["summary"], - ) + summary_text = getattr(self, sensor).orig_data.attrs["summary"] + self.summary_fields[summary_text] = None except KeyError: self.logger.warning("%s: No summary field", orig_netcdf_filename) @@ -1725,6 +1741,14 @@ def _nudge_pos(self, max_sec_diff_at_end=10): self.segment_count = segment_count self.segment_minsum = segment_minsum + # Calculate total underwater time and store for metadata + time_coord = self.combined_nc["navigation_time"] + time_diff = time_coord.to_numpy()[-1] - time_coord.to_numpy()[0] + # Convert timedelta64 to seconds (handles nanosecond precision) + total_seconds = float(time_diff / np.timedelta64(1, "s")) + self.nudge_segment_count = segment_count + self.nudge_total_minutes = total_seconds / 60.0 + return lon_nudged, lat_nudged def _gps_process(self, sensor): @@ -3340,8 +3364,20 @@ def process_command_line(self): ) self.args = parser.parse_args() - self.logger.setLevel(self._log_levels[self.verbose]) + + # Set instance attributes from parsed arguments + self.auv_name = self.args.auv_name + self.mission = self.args.mission + self.base_path = self.args.base_path + # calibration_dir is not in args - it's set manually in __main__ or passed to __init__ + self.plot = self.args.plot + self.verbose = self.args.verbose + self.local = self.args.local + self.noinput = self.args.noinput + self.clobber = self.args.clobber + self.noreprocess = self.args.noreprocess self.commandline = " ".join(sys.argv) + self.logger.setLevel(self._log_levels[self.verbose]) if __name__ == "__main__": From 9a42001bd1fe4870c041f41d9c1e1834d776b630 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 26 Nov 2025 11:07:17 -0800 Subject: [PATCH 106/121] Remove summary notes not needed after align step. --- src/data/align.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/data/align.py b/src/data/align.py index c9aadc4a..9c373415 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -156,11 +156,17 @@ def global_metadata(self) -> dict: # noqa: PLR0915 " software." ), ) - # Remove note about further processing needed + # Remove notes not needed after align step metadata["summary"] = metadata["summary"].replace( - " The data in this file are to be considered as simple time series data only " - "and are as close to the original data as possible. Further processing is " - "required to turn the data into a time series of profiles.", + " These data have been processed from the original lopc.bin file produced by the LOPC instrument.", # noqa: E501 + "", + ) + metadata["summary"] = metadata["summary"].replace( + " The data in this file are to be considered as simple time series data only and are as close to the original data as possible.", # noqa: E501 + "", + ) + metadata["summary"] = metadata["summary"].replace( + " Further processing is required to turn the data into a time series of profiles.", "", ) elif log_file: From 445e14468b17c23bcd82296370141891800ba5d1 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 26 Nov 2025 11:09:06 -0800 Subject: [PATCH 107/121] Use (simply) the summary attribute from the align file. --- src/data/resample.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/data/resample.py b/src/data/resample.py index 0b8fe3eb..08b859c5 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -176,7 +176,7 @@ def _build_global_metadata(self) -> None: ) return None - def dorado_global_metadata(self) -> dict: + def dorado_global_metadata(self) -> dict: # noqa: PLR0912 """Use instance variables to return a dictionary of metadata specific for the data that are written. Calls _build_global_metadata() first to populate common metadata. @@ -203,6 +203,8 @@ def dorado_global_metadata(self) -> dict: self.metadata["title"] += ( f"aligned, and resampled AUV sensor data from {self.auv_name} mission {self.mission}" ) + if "summary" in self.ds.attrs: + self.metadata["summary"] = self.ds.attrs["summary"] try: self.metadata["summary"] += ( f" Processing log file: {AUVCTD_OPENDAP_BASE}/surveys/" @@ -321,11 +323,6 @@ def lrauv_global_metadata(self) -> dict: if "summary" in self.ds.attrs: self.metadata["summary"] = self.ds.attrs["summary"] - else: - self.metadata["summary"] = ( - "Observational oceanographic data obtained from a Long Range Autonomous " - "Underwater Vehicle mission. Data have been aligned and resampled." - ) # Add resampling information and processing log file link to the summary self.metadata["summary"] += ( f" Data resampled to {self.freq} intervals following {self.mf_width} " From e4309b07c22a7bf4c98c1692cf57b42be56a5126 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 26 Nov 2025 11:09:26 -0800 Subject: [PATCH 108/121] Testing for metadata fixups. --- .vscode/launch.json | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 2cc42ade..c28dae7e 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -14,7 +14,7 @@ //"args": ["--auv_name", "i2map", "--mission", "2020.055.01", "--noinput", "--local", "-v", "2", "--clobber"] //"args": ["--auv_name", "Dorado389", "--mission", "2020.245.00", "--noinput", "-v", "2", "--portal", "http://stoqs.mbari.org:8080/auvdata/v1", "--clobber"] //"args": ["--auv_name", "Dorado389", "--mission", "2020.245.00", "--noinput", "-v"] - "args": ["--auv_name", "dorado", "--mission", "2017.297.00", "-v", "2", "--vehicle_dir", "/Volumes/AUVCTD/missionlogs"] + //"args": ["--auv_name", "dorado", "--mission", "2017.297.00", "-v", "1", "--vehicle_dir", "/Volumes/AUVCTD/missionlogs"] //"args": ["--auv_name", "Dorado389", "--start", "20190701", "--end", "20191230", "-v", "2"] //"args": ["--auv_name", "i2map", "--mission", "2021.062.01", "--noinput", "-v", "1"] //"args": ["--auv_name", "dorado", "--mission", "2021.109.00", "--noinput", "-v"] @@ -26,7 +26,7 @@ //"args": ["--auv_name", "dorado", "--mission", "2010.265.00", "--noinput", "-v"] //"args": ["--auv_name", "dorado", "--mission", "2023.324.00", "--noinput", "-v", "--vehicle_dir", "/Volumes/AUVCTD/missionlogs"] // Mission suffering from GPS Rollover bug. Add 1024 * 7 * 24 * 3600 = 619315200 seconds - //"args": ["--auv_name", "dorado", "--mission", "2025.316.02", "--noinput", "-v", "--vehicle_dir", "/Volumes/AUVCTD/missionlogs", "--add_seconds", "619315200" ] + "args": ["--auv_name", "dorado", "--mission", "2025.316.02", "--noinput", "-v", "--vehicle_dir", "/Volumes/AUVCTD/missionlogs", "--add_seconds", "619315200" ] }, { "name": "1.1 - lopcToNetCDF", @@ -86,7 +86,7 @@ //"args": ["--auv_name", "dorado", "--mission", "2010.181.00", "--plot", "first1000", "-v", "1"] // OverflowError: time values outside range of 64 bit signed integers in calibrate.py:413 //"args": ["--auv_name", "dorado", "--mission", "2017.304.00", "--plot", "first1000", "-v", "1"] - "args": ["--auv_name", "dorado", "--mission", "2017.297.00", "-v", "1"] + //"args": ["--auv_name", "dorado", "--mission", "2017.297.00", "-v", "1"] //"args": ["--auv_name", "i2map", "--mission", "2022.094.01", "-v", "2"] //"args": ["--auv_name", "i2map", "--mission", "2018.025.00", "-v", "2"] //"args": ["--auv_name", "dorado", "--mission", "2017.248.01", "-v", "1"] @@ -105,6 +105,7 @@ //"args": ["--auv_name", "i2map", "--mission", "2018.348.01", "-v", "2"] //"args": ["--auv_name", "dorado", "--mission", "2023.324.00", "-v", "1", "--plot", "first10000"] //"args": ["--auv_name", "dorado", "--mission", "2022.201.00", "-v", "1", "--plot", "first10000"] + "args": ["--auv_name", "dorado", "--mission", "2025.316.02", "-v", "1"] }, { "name": "2.1 - Test hs2_proc.py (its unit tests)", @@ -148,7 +149,8 @@ //"args": ["-v", "1", "--auv_name", "dorado", "--mission", "2008.289.03"], //"args": ["-v", "1", "--auv_name", "dorado", "--mission", "2023.192.01"], //"args": ["-v", "1", "--auv_name", "dorado", "--mission", "2024.317.01"], - "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] + "args": ["-v", "1", "--auv_name", "dorado", "--mission", "2025.316.02"], }, { "name": "3.1 - align.py for LRAUV --log_file", @@ -188,7 +190,8 @@ //"args": ["--auv_name", "dorado", "--mission", "2010.341.00", "-v", "1", "--plot", "--plot_seconds", "82000"], //"args": ["--auv_name", "dorado", "--mission", "2020.337.00", "-v", "1"], //"args": ["--auv_name", "dorado", "--mission", "2023.123.00", "-v", "1"], - "args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] + //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] + "args": ["--auv_name", "dorado", "--mission", "2025.316.02", "-v", "1"], }, { "name": "5.0 - archive.py", @@ -306,7 +309,7 @@ //"args": ["-v", "1", "--noinput", "--no_cleanup", "--start_year", "2007", "--end_year", "2007", "--create_products", "--num_cores", "1", "--archive_only_products"] //"args": ["-v", "1", "--noinput", "--no_cleanup", "--start_year", "2007", "--end_year", "2007", "--start_yd", "171", "--end_yd", "171", "--num_cores", "1", "--create_products", "--archive_only_products"] //"args": ["-v", "1", "--noinput", "--no_cleanup", "--mission", "2008.261.01", "--create_products", "--archive", "--archive_only_products"] - "args": ["-v", "1", "--noinput", "--no_cleanup", "--start_year", "2011", "--end_year", "2011", "--start_yd", "158", "--end_yd", "164", "--num_cores", "1"] + //"args": ["-v", "1", "--noinput", "--no_cleanup", "--start_year", "2011", "--end_year", "2011", "--start_yd", "158", "--end_yd", "164", "--num_cores", "1"] //"args": ["-v", "1", "--noinput", "--start_year", "2016", "--end_year", "2016", "--start_yd", "270", "--end_yd", "270", "--num_cores", "1", "--create_products", "--archive", "--archive_only_products"] //"args": ["-v", "1", "--noinput", "--num_cores", "1", "--mission", "2023.285.01"] //"args": ["-v", "1", "--noinput", "--no_cleanup", "--mission", "2018.079.00"] @@ -320,7 +323,7 @@ //"args": ["-v", "2", "--mission", "2004.029.03", "--noinput", "--no_cleanup"], //"args": ["-v", "1", "--mission", "2023.192.01", "--noinput", "--no_cleanup"], //"args": ["-v", "1", "--mission", "2010.151.04", "--noinput", "--no_cleanup", "--clobber"], - //"args": ["-v", "1", "--mission", "2025.316.02", "--noinput", "--no_cleanup", "--add_seconds", "619315200"], + "args": ["-v", "1", "--mission", "2025.316.02", "--noinput", "--no_cleanup", "--add_seconds", "619315200"], }, { From 9ecc3d8e1df8a5a1bf7cab0413206e058fc2a5ac Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 26 Nov 2025 15:07:29 -0800 Subject: [PATCH 109/121] Update EXPECTED_SIZE_s. --- src/data/test_process_i2map.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/data/test_process_i2map.py b/src/data/test_process_i2map.py index e7a9b553..df470347 100644 --- a/src/data/test_process_i2map.py +++ b/src/data/test_process_i2map.py @@ -30,9 +30,9 @@ def test_process_i2map(complete_i2map_processing): # but it will alert us if a code change unexpectedly changes the file size. # If code changes are expected to change the file size then we should # update the expected size here. - EXPECTED_SIZE_GITHUB = 58942 - EXPECTED_SIZE_ACT = 58912 - EXPECTED_SIZE_LOCAL = 59042 + EXPECTED_SIZE_GITHUB = 52682 + EXPECTED_SIZE_ACT = 52652 + EXPECTED_SIZE_LOCAL = 52782 if str(proc.args.base_path).startswith("/home/runner"): # The size is different in GitHub Actions, maybe due to different metadata assert nc_file.stat().st_size == EXPECTED_SIZE_GITHUB # noqa: S101 From 3395e38a0653874661c2294dace86f664722e2e8 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 26 Nov 2025 15:22:30 -0800 Subject: [PATCH 110/121] Test for LRAUV processing. Claude wrote some mocked data fixtures so it doesn't do the complete data processing pipeline the way the other tests do. --- src/data/test_process_lrauv.py | 145 +++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 src/data/test_process_lrauv.py diff --git a/src/data/test_process_lrauv.py b/src/data/test_process_lrauv.py new file mode 100644 index 00000000..bf30d667 --- /dev/null +++ b/src/data/test_process_lrauv.py @@ -0,0 +1,145 @@ +# noqa: INP001 + +import numpy as np +import pytest +import xarray as xr + +# The test should not take more than 5 minutes to run +MAX_SECS = 5 * 60 # 5 minutes + +# Test configuration for LRAUV processing with start/end dates +TEST_LRAUV_VEHICLE = "tethys" +TEST_START = "20120909T000000" +TEST_END = "20120910T000000" + + +@pytest.fixture(scope="session") +def mock_lrauv_data(tmp_path_factory): + """Create mock LRAUV data structure for testing.""" + base_path = tmp_path_factory.mktemp("lrauv_test") + vehicle_dir = base_path / TEST_LRAUV_VEHICLE + mission_year_dir = vehicle_dir / "missionlogs/2012" + mission_dir = mission_year_dir / "20120908_20120920" + + # Create .dlist file in the year directory (great-grandparent of log files) + # The filename should match the deployment directory name + dlist_file = mission_year_dir / "20120908_20120920.dlist" + dlist_file.parent.mkdir(parents=True, exist_ok=True) + dlist_file.write_text("# Deployment name: CANON_september2012\nSome other info\n") + + # Create two log file directories + log_dirs = [ + mission_dir / "20120909T010636", + mission_dir / "20120909T152301", + ] + + log_file_stems = [ + "201209090106_201209091521", + "201209091523_201209101900", + ] + + for log_dir, stem in zip(log_dirs, log_file_stems): # noqa: B905 + log_dir.mkdir(parents=True, exist_ok=True) + + # Create minimal Group files with realistic LRAUV structure + time_vals = np.arange( + np.datetime64("2012-09-09T01:00:00"), + np.datetime64("2012-09-09T15:00:00"), + np.timedelta64(1, "s"), + ) + + # Create a few Group files + for group_name in ["navigation", "ctd1", "oxygen"]: + ds = xr.Dataset( + { + f"{group_name}_latitude": (["time"], np.full(len(time_vals), 36.8)), + f"{group_name}_longitude": (["time"], np.full(len(time_vals), -121.8)), + f"{group_name}_depth": (["time"], np.random.uniform(0, 50, len(time_vals))), + }, + coords={"time": time_vals}, + ) + ds.to_netcdf(log_dir / f"{stem}_Group_{group_name}.nc") + + return base_path + + +@pytest.fixture(scope="session", autouse=False) +def complete_lrauv_processing(mock_lrauv_data): + """Process LRAUV data using start/end date range with mocked data.""" + # For now, just return the mock data path - full processing integration + # would require mocking the entire pipeline which is complex. + # Instead, we'll test individual components with the mocked data. + return mock_lrauv_data + + +def test_lrauv_mock_data_structure(complete_lrauv_processing): + """Test that mock LRAUV data structure is created correctly.""" + base_path = complete_lrauv_processing + + # Check that Group files were created for the first log file + log_file_stem = "201209090106_201209091521" + netcdfs_dir = ( + base_path / TEST_LRAUV_VEHICLE / "missionlogs/2012/20120908_20120920/20120909T010636" + ) + + # Check for Group files + group_files = list(netcdfs_dir.glob(f"{log_file_stem}_Group_*.nc")) + assert len(group_files) == 3, "Expected 3 Group files" # noqa: PLR2004, S101 + + # Check that Group files contain expected variables + for group_file in group_files: + ds = xr.open_dataset(group_file) + assert "time" in ds.coords # noqa: S101 + assert len(ds.dims) > 0 # noqa: S101 + ds.close() + + +def test_lrauv_deployment_name_parsing(complete_lrauv_processing): + """Test that deployment name can be parsed from .dlist file.""" + from utils import get_deployment_name + + base_path = complete_lrauv_processing + # Construct path to any log file in the structure + log_file = ( + base_path + / TEST_LRAUV_VEHICLE + / "missionlogs/2012/20120908_20120920/20120909T010636/201209090106_201209091521.nc4" + ) + + # The .dlist file should exist in the year directory + dlist_file = base_path / TEST_LRAUV_VEHICLE / "missionlogs/2012/20120908_20120920.dlist" + assert dlist_file.exists(), f".dlist file not found at {dlist_file}" # noqa: S101 + + # Test deployment name extraction + deployment_name = get_deployment_name(str(log_file), str(base_path)) + assert deployment_name == "CANON_september2012" # noqa: S101 + + +def test_lrauv_group_file_structure(complete_lrauv_processing): + """Test that Group files have correct LRAUV structure.""" + base_path = complete_lrauv_processing + + log_file_stem = "201209090106_201209091521" + netcdfs_dir = ( + base_path / TEST_LRAUV_VEHICLE / "missionlogs/2012/20120908_20120920/20120909T010636" + ) + + # Check navigation Group file + nav_file = netcdfs_dir / f"{log_file_stem}_Group_navigation.nc" + assert nav_file.exists() # noqa: S101 + + ds = xr.open_dataset(nav_file) + # Check for expected coordinate variables + assert "navigation_latitude" in ds.variables # noqa: S101 + assert "navigation_longitude" in ds.variables # noqa: S101 + assert "navigation_depth" in ds.variables # noqa: S101 + assert "time" in ds.coords # noqa: S101 + ds.close() + + +@pytest.mark.skip(reason="Full integration test - requires all processing modules") +def test_lrauv_full_pipeline(complete_lrauv_processing): + """Test full LRAUV processing pipeline from logs to resampled data.""" + # This would test the full pipeline but requires significant mocking + # of calibration files, configuration, etc. + pass # noqa: PIE790 From f123175dc056cd5294ac81e0149bdbb433a0ef99 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 26 Nov 2025 15:37:15 -0800 Subject: [PATCH 111/121] Refactor: Consolidate AUV utilities into utils.py Move utility functions from AUV.py to utils.py to improve code organization and eliminate the need for linting exceptions. Changes: - Move monotonic_increasing_time_indices() to utils.py - Move nudge_positions() and _create_nudge_plots() to utils.py - Update imports in calibrate.py, combine.py, and logs2netcdfs.py - Remove AUV.py module (no longer needed) - Enhance utils.py module docstring with comprehensive overview Benefits: - Cleaner codebase with better utility function grouping - No more linting exceptions needed for AUV.py - All utility functions now in a single, well-documented location - Maintains all existing functionality with zero behavior changes --- src/data/AUV.py | 389 ------------------------------------ src/data/calibrate.py | 2 +- src/data/combine.py | 2 +- src/data/logs2netcdfs.py | 2 +- src/data/utils.py | 411 +++++++++++++++++++++++++++++++++++++-- 5 files changed, 403 insertions(+), 403 deletions(-) delete mode 100755 src/data/AUV.py diff --git a/src/data/AUV.py b/src/data/AUV.py deleted file mode 100755 index 6d57ff68..00000000 --- a/src/data/AUV.py +++ /dev/null @@ -1,389 +0,0 @@ -#!/usr/bin/env python -""" -Base classes and utility functions for reading and writing data for MBARI's -Dorado class AUVs. - --- -Mike McCann -MBARI 30 March 2020 -""" - -import logging -from datetime import datetime - -import cf_xarray # Needed for the .cf accessor # noqa: F401 -import numpy as np -import xarray as xr - - -def monotonic_increasing_time_indices(time_array: np.array) -> np.ndarray: - monotonic = [] - last_t = 0.0 if isinstance(time_array[0], np.float64) else datetime.min # noqa: DTZ901 - for t in time_array: - if t > last_t: - monotonic.append(True) - last_t = t - else: - monotonic.append(False) - return np.array(monotonic) - - -def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 - nav_longitude: xr.DataArray, - nav_latitude: xr.DataArray, - gps_longitude: xr.DataArray, - gps_latitude: xr.DataArray, - logger: logging.Logger, - auv_name: str = "", - mission: str = "", - max_sec_diff_at_end: int = 10, - log_file: str = "", - create_plots: bool = False, # noqa: FBT001, FBT002 -) -> tuple[xr.DataArray, xr.DataArray, int, float]: - """ - Apply linear nudges to underwater latitudes and longitudes so that - they match the surface GPS positions. - - Parameters: - ----------- - nav_longitude : xr.DataArray - Navigation longitude data (dead reckoned) - nav_latitude : xr.DataArray - Navigation latitude data (dead reckoned) - gps_longitude : xr.DataArray - GPS longitude fixes - gps_latitude : xr.DataArray - GPS latitude fixes - logger : logging.Logger - Logger for output messages - auv_name : str, optional - AUV name for plot titles - mission : str, optional - Mission name for plot titles - max_sec_diff_at_end : int, optional - Maximum allowable time difference at segment end (default: 10) - create_plots : bool, optional - Whether to create debug plots (default: False) - - Returns: - -------- - tuple[xr.DataArray, xr.DataArray, int, float] - nudged_longitude, nudged_latitude, segment_count, segment_minsum - """ - segment_count = None - segment_minsum = None - - lon = nav_longitude - lat = nav_latitude - lon_fix = gps_longitude - lat_fix = gps_latitude - - logger.info( - f"{'seg#':5s} {'end_sec_diff':12s} {'end_lon_diff':12s} {'end_lat_diff':12s}" # noqa: G004 - f" {'len(segi)':9s} {'seg_min':>9s} {'u_drift (cm/s)':14s} {'v_drift (cm/s)':14s}" - f" {'start datetime of segment':>29}", - ) - - # Any dead reckoned points before first GPS fix - usually empty - # as GPS fix happens before dive - segi = np.where(lat.cf["T"].data < lat_fix.cf["T"].data[0])[0] - if lon[:][segi].any(): - lon_nudged_array = lon[segi] - lat_nudged_array = lat[segi] - dt_nudged = lon.cf["T"][segi] - logger.debug( - "Filled _nudged arrays with %d values starting at %s " - "which were before the first GPS fix at %s", - len(segi), - lat.cf["T"].data[0], - lat_fix.cf["T"].data[0], - ) - else: - lon_nudged_array = np.array([]) - lat_nudged_array = np.array([]) - dt_nudged = np.array([], dtype="datetime64[ns]") - if segi.any(): - # Return difference of numpy timestamps in units of minutes - seg_min = (lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]).astype( - "timedelta64[s]" - ).astype(float) / 60.0 - else: - seg_min = 0 - logger.info( - f"{' ':5} {'-':>12} {'-':>12} {'-':>12} {len(segi):-9d} {seg_min:9.2f} {'-':>14} {'-':>14} {'-':>29}", # noqa: E501, G004 - ) - - MIN_SEGMENT_LENGTH = 10 - seg_count = 0 - seg_minsum = 0 - error_message = "" - for i in range(len(lat_fix) - 1): - # Segment of dead reckoned (under water) positions, each surrounded by GPS fixes - segi = np.where( - np.logical_and( - lat.cf["T"].data > lat_fix.cf["T"].data[i], - lat.cf["T"].data < lat_fix.cf["T"].data[i + 1], - ), - )[0] - if not segi.any(): - logger.debug( - f"No dead reckoned values found between GPS times of " # noqa: G004 - f"{lat_fix.cf['T'].data[i]} and {lat_fix.cf['T'].data[i + 1]}", - ) - continue - - end_sec_diff = float(lat_fix.cf["T"].data[i + 1] - lat.cf["T"].data[segi[-1]]) / 1.0e9 - - end_lon_diff = float(lon_fix[i + 1]) - float(lon[segi[-1]]) - end_lat_diff = float(lat_fix[i + 1]) - float(lat[segi[-1]]) - - # Compute approximate horizontal drift rate as a sanity check - try: - u_drift = ( - end_lon_diff - * float(np.cos(lat_fix[i + 1] * np.pi / 180)) - * 60 - * 185300 - / (float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9) - ) - except ZeroDivisionError: - u_drift = 0 - try: - v_drift = ( - end_lat_diff - * 60 - * 185300 - / (float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9) - ) - except ZeroDivisionError: - v_drift = 0 - - if abs(end_lon_diff) > 1 or abs(end_lat_diff) > 1: - # Error handling - same as original - logger.info( - f"{i:5d}: {end_sec_diff:12.3f} {end_lon_diff:12.7f}" # noqa: G004 - f" {end_lat_diff:12.7f} {len(segi):-9d} {seg_min:9.2f}" - f" {u_drift:14.3f} {v_drift:14.3f} {lat.cf['T'].data[segi][-1]}", - ) - logger.error( - "End of underwater segment dead reckoned position is too different " - "from GPS fix: abs(end_lon_diff) (%s) > 1 or abs(end_lat_diff) (%s) > 1", - end_lon_diff, - end_lat_diff, - ) - if log_file: - logger.info( - "Fix this error by calling _range_qc_combined_nc() in " - "_navigation_process() and/or _gps_process() for %s", - log_file, - ) - logger.info("Run to get a plot: combine.py -v 1 --plot --log_file %s", log_file) - elif auv_name and mission: - logger.info( - "Fix this error by calling _range_qc_combined_nc() in " - "_navigation_process() and/or _gps_process() for %s %s", - auv_name, - mission, - ) - error_message = ( - f"abs(end_lon_diff) ({end_lon_diff}) > 1 or abs(end_lat_diff) ({end_lat_diff}) > 1" - ) - if abs(end_sec_diff) > max_sec_diff_at_end: - logger.warning( - "abs(end_sec_diff) (%s) > max_sec_diff_at_end (%s)", - end_sec_diff, - max_sec_diff_at_end, - ) - logger.info( - "Overriding end_lon_diff (%s) and end_lat_diff (%s) by setting them to 0", - end_lon_diff, - end_lat_diff, - ) - end_lon_diff = 0 - end_lat_diff = 0 - - seg_min = float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9 / 60 - seg_minsum += seg_min - - if len(segi) > MIN_SEGMENT_LENGTH: - logger.info( - f"{seg_count:5d}: {end_sec_diff:12.3f} {end_lon_diff:12.7f}" # noqa: G004 - f" {end_lat_diff:12.7f} {len(segi):-9d} {seg_min:9.2f}" - f" {u_drift:14.3f} {v_drift:14.3f} {lat.cf['T'].data[segi][-1]}", - ) - seg_count += 1 - - # Start with zero adjustment at beginning and linearly ramp up to the diff at the end - lon_nudge = np.interp( - lon.cf["T"].data[segi].astype(np.int64), - [ - lon.cf["T"].data[segi].astype(np.int64)[0], - lon.cf["T"].data[segi].astype(np.int64)[-1], - ], - [0, end_lon_diff], - ) - lat_nudge = np.interp( - lat.cf["T"].data[segi].astype(np.int64), - [ - lat.cf["T"].data[segi].astype(np.int64)[0], - lat.cf["T"].data[segi].astype(np.int64)[-1], - ], - [0, end_lat_diff], - ) - - # Sanity checks - MAX_LONGITUDE = 180 - MAX_LATITUDE = 90 - if ( - np.max(np.abs(lon[segi] + lon_nudge)) > MAX_LONGITUDE - or np.max(np.abs(lat[segi] + lon_nudge)) > MAX_LATITUDE - ): - logger.warning( - "Nudged coordinate is way out of reasonable range - segment %d", - seg_count, - ) - logger.warning( - " max(abs(lon)) = %s", - np.max(np.abs(lon[segi] + lon_nudge)), - ) - logger.warning( - " max(abs(lat)) = %s", - np.max(np.abs(lat[segi] + lat_nudge)), - ) - - lon_nudged_array = np.append(lon_nudged_array, lon[segi] + lon_nudge) - lat_nudged_array = np.append(lat_nudged_array, lat[segi] + lat_nudge) - dt_nudged = np.append(dt_nudged, lon.cf["T"].data[segi]) - - # Any dead reckoned points after last GPS fix - segi = np.where(lat.cf["T"].data > lat_fix.cf["T"].data[-1])[0] - seg_min = 0 - if segi.any(): - lon_nudged_array = np.append(lon_nudged_array, lon[segi]) - lat_nudged_array = np.append(lat_nudged_array, lat[segi]) - dt_nudged = np.append(dt_nudged, lon.cf["T"].data[segi]) - seg_min = float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9 / 60 - - logger.info( - f"{seg_count + 1:5d}: {'-':>12} {'-':>12} {'-':>12} {len(segi):-9d} {seg_min:9.2f} {'-':>14} {'-':>14}", # noqa: E501, G004 - ) - segment_count = seg_count - segment_minsum = seg_minsum - - logger.info("Points in final series = %d", len(dt_nudged)) - - lon_nudged = xr.DataArray( - data=lon_nudged_array, - dims=["time"], - coords={"time": dt_nudged}, - name="longitude", - ) - lat_nudged = xr.DataArray( - data=lat_nudged_array, - dims=["time"], - coords={"time": dt_nudged}, - name="latitude", - ) - - # Optional plotting code - raise error after opportunity to plot - if create_plots: - _create_nudge_plots( - lat, lon, lat_fix, lon_fix, lat_nudged, lon_nudged, auv_name, mission, logger - ) - - if error_message: - logger.error("Nudge positions error: %s", error_message) - raise ValueError(error_message) - - return lon_nudged, lat_nudged, segment_count, segment_minsum - - -def _create_nudge_plots( # noqa: PLR0913 - lat, lon, lat_fix, lon_fix, lat_nudged, lon_nudged, auv_name, mission, logger -): - """Create debug plots for position nudging (separated for clarity).""" - try: - import matplotlib.pyplot as plt - - try: - import cartopy.crs as ccrs # type: ignore # noqa: I001, PGH003 - from matplotlib import patches - from shapely.geometry import LineString # type: ignore # noqa: PGH003 - - has_cartopy = True - except ImportError: - has_cartopy = False - - # Time series plots - fig, axes = plt.subplots(nrows=2, figsize=(18, 6)) - axes[0].plot(lat_nudged.coords["time"].data, lat_nudged, "-") - axes[0].plot(lat.cf["T"].data, lat, "--") - axes[0].plot(lat_fix.cf["T"].data, lat_fix, "*") - axes[0].set_ylabel("Latitude") - axes[0].legend(["Nudged", "Original", "GPS Fixes"]) - axes[1].plot(lon_nudged.coords["time"].data, lon_nudged, "-") - axes[1].plot(lon.cf["T"].data, lon, "--") - axes[1].plot(lon_fix.cf["T"].data, lon_fix, "*") - axes[1].set_ylabel("Longitude") - axes[1].legend(["Nudged", "Original", "GPS Fixes"]) - title = "Corrected nav from nudge_positions()" - fig.suptitle(title) - axes[0].grid() - axes[1].grid() - logger.debug("Pausing with plot entitled: %s. Close window to continue.", title) - plt.show() - - # Map plot - if has_cartopy: - ax = plt.axes(projection=ccrs.PlateCarree()) - nudged = LineString(zip(lon_nudged.to_numpy(), lat_nudged.to_numpy(), strict=False)) - original = LineString(zip(lon.to_numpy(), lat.to_numpy(), strict=False)) - ax.add_geometries( - [nudged], - crs=ccrs.PlateCarree(), - edgecolor="red", - facecolor="none", - label="Nudged", - ) - ax.add_geometries( - [original], - crs=ccrs.PlateCarree(), - edgecolor="grey", - facecolor="none", - label="Original", - ) - handle_gps = ax.scatter( - lon_fix.to_numpy(), - lat_fix.to_numpy(), - color="green", - label="GPS Fixes", - ) - bounds = nudged.buffer(0.02).bounds - extent = bounds[0], bounds[2], bounds[1], bounds[3] - ax.set_extent(extent, crs=ccrs.PlateCarree()) - ax.coastlines() - - handle_nudged = patches.Rectangle((0, 0), 1, 0.1, facecolor="red") - handle_original = patches.Rectangle((0, 0), 1, 0.1, facecolor="gray") - ax.legend( - [handle_nudged, handle_original, handle_gps], - ["Nudged", "Original", "GPS Fixes"], - ) - ax.gridlines( - crs=ccrs.PlateCarree(), - draw_labels=True, - linewidth=1, - color="gray", - alpha=0.5, - ) - ax.set_title(f"{auv_name} {mission}") - logger.debug( - "Pausing map plot (doesn't work well in VS Code debugger)." - " Close window to continue.", - ) - plt.show() - else: - logger.warning("No map plot, could not import cartopy") - - except ImportError: - logger.warning("Could not create plots - matplotlib not available") diff --git a/src/data/calibrate.py b/src/data/calibrate.py index 7231ea56..36c472b1 100755 --- a/src/data/calibrate.py +++ b/src/data/calibrate.py @@ -50,7 +50,7 @@ from scipy import signal from scipy.interpolate import interp1d -from AUV import monotonic_increasing_time_indices, nudge_positions +from utils import monotonic_increasing_time_indices, nudge_positions from common_args import get_standard_dorado_parser from hs2_proc import compute_backscatter, hs2_calc_bb, hs2_read_cal_file from logs2netcdfs import AUV_NetCDF, MISSIONLOGS, MISSIONNETCDFS, TIME, TIME60HZ diff --git a/src/data/combine.py b/src/data/combine.py index 5ec13ed5..b7c0c798 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -51,7 +51,7 @@ import numpy as np import pandas as pd import xarray as xr -from AUV import monotonic_increasing_time_indices, nudge_positions +from utils import monotonic_increasing_time_indices, nudge_positions from common_args import get_standard_lrauv_parser from logs2netcdfs import AUV_NetCDF, TIME, TIME60HZ from nc42netcdfs import BASE_LRAUV_PATH, GROUP diff --git a/src/data/logs2netcdfs.py b/src/data/logs2netcdfs.py index dbae3819..d49b873e 100755 --- a/src/data/logs2netcdfs.py +++ b/src/data/logs2netcdfs.py @@ -29,7 +29,7 @@ from aiohttp.client_exceptions import ClientConnectorError from netCDF4 import Dataset -from AUV import monotonic_increasing_time_indices +from utils import monotonic_increasing_time_indices from common_args import get_standard_dorado_parser from readauvlog import log_record diff --git a/src/data/utils.py b/src/data/utils.py index c55783eb..bab58af3 100644 --- a/src/data/utils.py +++ b/src/data/utils.py @@ -1,17 +1,20 @@ # noqa: INP001 +""" +Utility functions for MBARI AUV data processing. -# pure-Python Douglas-Peucker line simplification/generalization -# -# this code was written by Schuyler Erle and is -# made available in the public domain. -# -# the code was ported from a freely-licensed example at -# http://www.3dsoftware.com/Cartography/Programming/PolyLineReduction/ -# -# the original page is no longer available, but is mirrored at -# http://www.mappinghacks.com/code/PolyLineReduction/ +Includes: +- Douglas-Peucker line simplification (pure-Python implementation) +- LRAUV deployment name parsing +- Time series monotonicity checking +- Position nudging for dead reckoning correction -""" +The Douglas-Peucker code was written by Schuyler Erle and is +made available in the public domain. It was ported from a freely-licensed example at +http://www.3dsoftware.com/Cartography/Programming/PolyLineReduction/ +(original page no longer available, but mirrored at +http://www.mappinghacks.com/code/PolyLineReduction/) + +Example usage of simplify_points: >>> line = [(0,0),(1,0),(2,0),(2,1),(2,2),(1,2),(0,2),(0,1),(0,0)] >>> simplify_points(line, 1.0) @@ -25,8 +28,13 @@ import logging import math +from datetime import datetime from pathlib import Path +import cf_xarray # Needed for the .cf accessor # noqa: F401 +import numpy as np +import xarray as xr + def get_deployment_name( log_file: str, base_lrauv_path: Path, logger: logging.Logger = None @@ -78,6 +86,387 @@ def get_deployment_name( return None +def monotonic_increasing_time_indices(time_array: np.array) -> np.ndarray: + """Check which elements in a time array are monotonically increasing. + + Args: + time_array: Array of time values (datetime or float) + + Returns: + Boolean array indicating which elements maintain monotonic increase + """ + monotonic = [] + last_t = 0.0 if isinstance(time_array[0], np.float64) else datetime.min # noqa: DTZ901 + for t in time_array: + if t > last_t: + monotonic.append(True) + last_t = t + else: + monotonic.append(False) + return np.array(monotonic) + + +def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 + nav_longitude: xr.DataArray, + nav_latitude: xr.DataArray, + gps_longitude: xr.DataArray, + gps_latitude: xr.DataArray, + logger: logging.Logger, + auv_name: str = "", + mission: str = "", + max_sec_diff_at_end: int = 10, + log_file: str = "", + create_plots: bool = False, # noqa: FBT001, FBT002 +) -> tuple[xr.DataArray, xr.DataArray, int, float]: + """ + Apply linear nudges to underwater latitudes and longitudes so that + they match the surface GPS positions. + + Parameters: + ----------- + nav_longitude : xr.DataArray + Navigation longitude data (dead reckoned) + nav_latitude : xr.DataArray + Navigation latitude data (dead reckoned) + gps_longitude : xr.DataArray + GPS longitude fixes + gps_latitude : xr.DataArray + GPS latitude fixes + logger : logging.Logger + Logger for output messages + auv_name : str, optional + AUV name for plot titles + mission : str, optional + Mission name for plot titles + max_sec_diff_at_end : int, optional + Maximum allowable time difference at segment end (default: 10) + create_plots : bool, optional + Whether to create debug plots (default: False) + + Returns: + -------- + tuple[xr.DataArray, xr.DataArray, int, float] + nudged_longitude, nudged_latitude, segment_count, segment_minsum + """ + segment_count = None + segment_minsum = None + + lon = nav_longitude + lat = nav_latitude + lon_fix = gps_longitude + lat_fix = gps_latitude + + logger.info( + f"{'seg#':5s} {'end_sec_diff':12s} {'end_lon_diff':12s} {'end_lat_diff':12s}" # noqa: G004 + f" {'len(segi)':9s} {'seg_min':>9s} {'u_drift (cm/s)':14s} {'v_drift (cm/s)':14s}" + f" {'start datetime of segment':>29}", + ) + + # Any dead reckoned points before first GPS fix - usually empty + # as GPS fix happens before dive + segi = np.where(lat.cf["T"].data < lat_fix.cf["T"].data[0])[0] + if lon[:][segi].any(): + lon_nudged_array = lon[segi] + lat_nudged_array = lat[segi] + dt_nudged = lon.cf["T"][segi] + logger.debug( + "Filled _nudged arrays with %d values starting at %s " + "which were before the first GPS fix at %s", + len(segi), + lat.cf["T"].data[0], + lat_fix.cf["T"].data[0], + ) + else: + lon_nudged_array = np.array([]) + lat_nudged_array = np.array([]) + dt_nudged = np.array([], dtype="datetime64[ns]") + if segi.any(): + # Return difference of numpy timestamps in units of minutes + seg_min = (lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]).astype( + "timedelta64[s]" + ).astype(float) / 60.0 + else: + seg_min = 0 + logger.info( + f"{' ':5} {'-':>12} {'-':>12} {'-':>12} {len(segi):-9d} {seg_min:9.2f} {'-':>14} {'-':>14} {'-':>29}", # noqa: E501, G004 + ) + + MIN_SEGMENT_LENGTH = 10 + seg_count = 0 + seg_minsum = 0 + error_message = "" + for i in range(len(lat_fix) - 1): + # Segment of dead reckoned (under water) positions, each surrounded by GPS fixes + segi = np.where( + np.logical_and( + lat.cf["T"].data > lat_fix.cf["T"].data[i], + lat.cf["T"].data < lat_fix.cf["T"].data[i + 1], + ), + )[0] + if not segi.any(): + logger.debug( + f"No dead reckoned values found between GPS times of " # noqa: G004 + f"{lat_fix.cf['T'].data[i]} and {lat_fix.cf['T'].data[i + 1]}", + ) + continue + + end_sec_diff = float(lat_fix.cf["T"].data[i + 1] - lat.cf["T"].data[segi[-1]]) / 1.0e9 + + end_lon_diff = float(lon_fix[i + 1]) - float(lon[segi[-1]]) + end_lat_diff = float(lat_fix[i + 1]) - float(lat[segi[-1]]) + + # Compute approximate horizontal drift rate as a sanity check + try: + u_drift = ( + end_lon_diff + * float(np.cos(lat_fix[i + 1] * np.pi / 180)) + * 60 + * 185300 + / (float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9) + ) + except ZeroDivisionError: + u_drift = 0 + try: + v_drift = ( + end_lat_diff + * 60 + * 185300 + / (float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9) + ) + except ZeroDivisionError: + v_drift = 0 + + if abs(end_lon_diff) > 1 or abs(end_lat_diff) > 1: + # Error handling - same as original + logger.info( + f"{i:5d}: {end_sec_diff:12.3f} {end_lon_diff:12.7f}" # noqa: G004 + f" {end_lat_diff:12.7f} {len(segi):-9d} {seg_min:9.2f}" + f" {u_drift:14.3f} {v_drift:14.3f} {lat.cf['T'].data[segi][-1]}", + ) + logger.error( + "End of underwater segment dead reckoned position is too different " + "from GPS fix: abs(end_lon_diff) (%s) > 1 or abs(end_lat_diff) (%s) > 1", + end_lon_diff, + end_lat_diff, + ) + if log_file: + logger.info( + "Fix this error by calling _range_qc_combined_nc() in " + "_navigation_process() and/or _gps_process() for %s", + log_file, + ) + logger.info("Run to get a plot: combine.py -v 1 --plot --log_file %s", log_file) + elif auv_name and mission: + logger.info( + "Fix this error by calling _range_qc_combined_nc() in " + "_navigation_process() and/or _gps_process() for %s %s", + auv_name, + mission, + ) + error_message = ( + f"abs(end_lon_diff) ({end_lon_diff}) > 1 or abs(end_lat_diff) ({end_lat_diff}) > 1" + ) + if abs(end_sec_diff) > max_sec_diff_at_end: + logger.warning( + "abs(end_sec_diff) (%s) > max_sec_diff_at_end (%s)", + end_sec_diff, + max_sec_diff_at_end, + ) + logger.info( + "Overriding end_lon_diff (%s) and end_lat_diff (%s) by setting them to 0", + end_lon_diff, + end_lat_diff, + ) + end_lon_diff = 0 + end_lat_diff = 0 + + seg_min = float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9 / 60 + seg_minsum += seg_min + + if len(segi) > MIN_SEGMENT_LENGTH: + logger.info( + f"{seg_count:5d}: {end_sec_diff:12.3f} {end_lon_diff:12.7f}" # noqa: G004 + f" {end_lat_diff:12.7f} {len(segi):-9d} {seg_min:9.2f}" + f" {u_drift:14.3f} {v_drift:14.3f} {lat.cf['T'].data[segi][-1]}", + ) + seg_count += 1 + + # Start with zero adjustment at beginning and linearly ramp up to the diff at the end + lon_nudge = np.interp( + lon.cf["T"].data[segi].astype(np.int64), + [ + lon.cf["T"].data[segi].astype(np.int64)[0], + lon.cf["T"].data[segi].astype(np.int64)[-1], + ], + [0, end_lon_diff], + ) + lat_nudge = np.interp( + lat.cf["T"].data[segi].astype(np.int64), + [ + lat.cf["T"].data[segi].astype(np.int64)[0], + lat.cf["T"].data[segi].astype(np.int64)[-1], + ], + [0, end_lat_diff], + ) + + # Sanity checks + MAX_LONGITUDE = 180 + MAX_LATITUDE = 90 + if ( + np.max(np.abs(lon[segi] + lon_nudge)) > MAX_LONGITUDE + or np.max(np.abs(lat[segi] + lon_nudge)) > MAX_LATITUDE + ): + logger.warning( + "Nudged coordinate is way out of reasonable range - segment %d", + seg_count, + ) + logger.warning( + " max(abs(lon)) = %s", + np.max(np.abs(lon[segi] + lon_nudge)), + ) + logger.warning( + " max(abs(lat)) = %s", + np.max(np.abs(lat[segi] + lat_nudge)), + ) + + lon_nudged_array = np.append(lon_nudged_array, lon[segi] + lon_nudge) + lat_nudged_array = np.append(lat_nudged_array, lat[segi] + lat_nudge) + dt_nudged = np.append(dt_nudged, lon.cf["T"].data[segi]) + + # Any dead reckoned points after last GPS fix + segi = np.where(lat.cf["T"].data > lat_fix.cf["T"].data[-1])[0] + seg_min = 0 + if segi.any(): + lon_nudged_array = np.append(lon_nudged_array, lon[segi]) + lat_nudged_array = np.append(lat_nudged_array, lat[segi]) + dt_nudged = np.append(dt_nudged, lon.cf["T"].data[segi]) + seg_min = float(lat.cf["T"].data[segi][-1] - lat.cf["T"].data[segi][0]) / 1.0e9 / 60 + + logger.info( + f"{seg_count + 1:5d}: {'-':>12} {'-':>12} {'-':>12} {len(segi):-9d} {seg_min:9.2f} {'-':>14} {'-':>14}", # noqa: E501, G004 + ) + segment_count = seg_count + segment_minsum = seg_minsum + + logger.info("Points in final series = %d", len(dt_nudged)) + + lon_nudged = xr.DataArray( + data=lon_nudged_array, + dims=["time"], + coords={"time": dt_nudged}, + name="longitude", + ) + lat_nudged = xr.DataArray( + data=lat_nudged_array, + dims=["time"], + coords={"time": dt_nudged}, + name="latitude", + ) + + # Optional plotting code - raise error after opportunity to plot + if create_plots: + _create_nudge_plots( + lat, lon, lat_fix, lon_fix, lat_nudged, lon_nudged, auv_name, mission, logger + ) + + if error_message: + logger.error("Nudge positions error: %s", error_message) + raise ValueError(error_message) + + return lon_nudged, lat_nudged, segment_count, segment_minsum + + +def _create_nudge_plots( # noqa: PLR0913 + lat, lon, lat_fix, lon_fix, lat_nudged, lon_nudged, auv_name, mission, logger +): + """Create debug plots for position nudging (separated for clarity).""" + try: + import matplotlib.pyplot as plt + + try: + import cartopy.crs as ccrs # type: ignore # noqa: I001, PGH003 + from matplotlib import patches + from shapely.geometry import LineString # type: ignore # noqa: PGH003 + + has_cartopy = True + except ImportError: + has_cartopy = False + + # Time series plots + fig, axes = plt.subplots(nrows=2, figsize=(18, 6)) + axes[0].plot(lat_nudged.coords["time"].data, lat_nudged, "-") + axes[0].plot(lat.cf["T"].data, lat, "--") + axes[0].plot(lat_fix.cf["T"].data, lat_fix, "*") + axes[0].set_ylabel("Latitude") + axes[0].legend(["Nudged", "Original", "GPS Fixes"]) + axes[1].plot(lon_nudged.coords["time"].data, lon_nudged, "-") + axes[1].plot(lon.cf["T"].data, lon, "--") + axes[1].plot(lon_fix.cf["T"].data, lon_fix, "*") + axes[1].set_ylabel("Longitude") + axes[1].legend(["Nudged", "Original", "GPS Fixes"]) + title = "Corrected nav from nudge_positions()" + fig.suptitle(title) + axes[0].grid() + axes[1].grid() + logger.debug("Pausing with plot entitled: %s. Close window to continue.", title) + plt.show() + + # Map plot + if has_cartopy: + ax = plt.axes(projection=ccrs.PlateCarree()) + nudged = LineString(zip(lon_nudged.to_numpy(), lat_nudged.to_numpy(), strict=False)) + original = LineString(zip(lon.to_numpy(), lat.to_numpy(), strict=False)) + ax.add_geometries( + [nudged], + crs=ccrs.PlateCarree(), + edgecolor="red", + facecolor="none", + label="Nudged", + ) + ax.add_geometries( + [original], + crs=ccrs.PlateCarree(), + edgecolor="grey", + facecolor="none", + label="Original", + ) + handle_gps = ax.scatter( + lon_fix.to_numpy(), + lat_fix.to_numpy(), + color="green", + label="GPS Fixes", + ) + bounds = nudged.buffer(0.02).bounds + extent = bounds[0], bounds[2], bounds[1], bounds[3] + ax.set_extent(extent, crs=ccrs.PlateCarree()) + ax.coastlines() + + handle_nudged = patches.Rectangle((0, 0), 1, 0.1, facecolor="red") + handle_original = patches.Rectangle((0, 0), 1, 0.1, facecolor="gray") + ax.legend( + [handle_nudged, handle_original, handle_gps], + ["Nudged", "Original", "GPS Fixes"], + ) + ax.gridlines( + crs=ccrs.PlateCarree(), + draw_labels=True, + linewidth=1, + color="gray", + alpha=0.5, + ) + ax.set_title(f"{auv_name} {mission}") + logger.debug( + "Pausing map plot (doesn't work well in VS Code debugger)." + " Close window to continue.", + ) + plt.show() + else: + logger.warning("No map plot, could not import cartopy") + + except ImportError: + logger.warning("Could not create plots - matplotlib not available") + + def simplify_points(pts, tolerance): anchor = 0 floater = len(pts) - 1 From 1fd7ffc36b1aeea1c41958448e097ae723785362 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 26 Nov 2025 16:41:26 -0800 Subject: [PATCH 112/121] Ensure GPS fixes have monotonically increasing timestamps. Testing with tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4 revealed this error: ValueError: conflicting sizes for dimension 'nudged_time': length 63728 on the data but length 63611 on coordinate 'nudged_time' which I gave to Claude. I told it to keep digging on the problem and it eventually found the initial cause: There it is! GPS fix 0 has a timestamp after GPS fix 1! The first two GPS fixes are out of order: GPS fix 0: 2012-09-17T03:05:44 GPS fix 1: 2012-09-17T03:04:52 This is why we now have this commit. --- .vscode/launch.json | 8 ++++---- src/data/combine.py | 23 +++++++++++++++++++++-- src/data/utils.py | 1 + 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index c28dae7e..11d94f1e 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -123,10 +123,10 @@ "console": "integratedTerminal", "justMyCode": false, //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] - "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot"] + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot"] //"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120909T010636/201209090106_201209091521.nc4", "--plot"] // Conflicting sizes for nudged_time and data - //"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--plot"] + "args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--plot"] }, { @@ -345,11 +345,11 @@ //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber"] //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4", "--clobber", "--no_cleanup"] //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup"] - "args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup", "--clobber"] + //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup", "--clobber"] // Has different universals time coodinates for longitude/latitude and depth //"args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901T000000", "--end", "20121101T000000", "--noinput", "--no_cleanup"] // Conflicting sizes for nudged_time and data - //"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--no_cleanup" + "args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--no_cleanup" //"args": ["-v", "1", "--auv_name", "brizo", "--start", "20250915T000000", "--end", "20250917T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] // No nudged latitude and longitude variables //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250915T015535/202509150155_202509151602.nc4", "--no_cleanup"] diff --git a/src/data/combine.py b/src/data/combine.py index b7c0c798..9fb05d81 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -659,12 +659,31 @@ def _initial_coordinate_qc(self) -> None: def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10) -> None: """Add nudged longitude and latitude variables to the combined dataset.""" self._initial_coordinate_qc() + + # Ensure GPS fixes have monotonically increasing timestamps + gps_lon = self.combined_nc["nal9602_longitude_fix"] + gps_lat = self.combined_nc["nal9602_latitude_fix"] + gps_time_coord = gps_lon.coords[gps_lon.dims[0]] + + # Convert to pandas index which handles datetime comparisons properly + gps_time_index = gps_time_coord.to_index() + gps_monotonic = monotonic_increasing_time_indices(gps_time_index) + if not np.all(gps_monotonic): + monotonic_count = np.sum(gps_monotonic) + self.logger.warning( + "Filtered GPS fixes from %d to %d to ensure monotonically increasing timestamps", + len(gps_lon), + monotonic_count, + ) + gps_lon = gps_lon.isel({gps_lon.dims[0]: gps_monotonic}) + gps_lat = gps_lat.isel({gps_lat.dims[0]: gps_monotonic}) + try: nudged_longitude, nudged_latitude, segment_count, segment_minsum = nudge_positions( nav_longitude=self.combined_nc["universals_longitude"], nav_latitude=self.combined_nc["universals_latitude"], - gps_longitude=self.combined_nc["nal9602_longitude_fix"], - gps_latitude=self.combined_nc["nal9602_latitude_fix"], + gps_longitude=gps_lon, + gps_latitude=gps_lat, logger=self.logger, auv_name="", mission="", diff --git a/src/data/utils.py b/src/data/utils.py index bab58af3..c635da10 100644 --- a/src/data/utils.py +++ b/src/data/utils.py @@ -153,6 +153,7 @@ def nudge_positions( # noqa: C901, PLR0912, PLR0913, PLR0915 lon = nav_longitude lat = nav_latitude + lon_fix = gps_longitude lat_fix = gps_latitude From a5758f3aa27d4c16e8c2b331062cac3e86250e43 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 26 Nov 2025 17:00:35 -0800 Subject: [PATCH 113/121] Check if GPS fix variables exist. --- .vscode/launch.json | 12 +++++++----- src/data/combine.py | 11 +++++++++++ 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 11d94f1e..7b702f43 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -64,7 +64,7 @@ //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot_time", "/longitude_time"] //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109_cleaned_by_quinn.nc4", "--plot_time", "/longitude_time"] //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109_cleaned_by_quinn_latlon.nc4", "--plot_time", "/longitude_time"] - // Conflicting sizes for nudged_time and data + // Conflicting sizes for nudged_time and data - fixed by filtering GPS fixes to be monotonically increasing "args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--plot_time", "/longitude_time"] }, { @@ -125,7 +125,7 @@ //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250914T080941/202509140809_202509150109.nc4"] //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--plot"] //"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120909T010636/201209090106_201209091521.nc4", "--plot"] - // Conflicting sizes for nudged_time and data + // Conflicting sizes for nudged_time and data - fixed by filtering GPS fixes to be monotonically increasing "args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--plot"] }, @@ -348,11 +348,13 @@ //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4", "--no_cleanup", "--clobber"] // Has different universals time coodinates for longitude/latitude and depth //"args": ["-v", "1", "--auv_name", "tethys", "--start", "20120901T000000", "--end", "20121101T000000", "--noinput", "--no_cleanup"] - // Conflicting sizes for nudged_time and data - "args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--no_cleanup" + // Conflicting sizes for nudged_time and data - fixed by filtering GPS fixes to be monotonically increasing + //"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--no_cleanup" //"args": ["-v", "1", "--auv_name", "brizo", "--start", "20250915T000000", "--end", "20250917T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] - // No nudged latitude and longitude variables + // No nudged latitude and longitude variables - fixed as of 26 Nov 2025 //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250915T015535/202509150155_202509151602.nc4", "--no_cleanup"] + // Plankitvore deployment for CeNCOOS Syncro + "args": ["-v", "1", "--auv_name", "ahi", "--start", "20250401T000000", "--end", "20250502T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] }, ] diff --git a/src/data/combine.py b/src/data/combine.py index 9fb05d81..7af50804 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -660,6 +660,17 @@ def _add_nudged_coordinates(self, max_sec_diff_at_end: int = 10) -> None: """Add nudged longitude and latitude variables to the combined dataset.""" self._initial_coordinate_qc() + # Check if GPS fix variables exist + if ( + "nal9602_longitude_fix" not in self.combined_nc + or "nal9602_latitude_fix" not in self.combined_nc + ): + self.logger.warning( + "No GPS fix variables found in combined dataset - " + "skipping nudged coordinate creation" + ) + return + # Ensure GPS fixes have monotonically increasing timestamps gps_lon = self.combined_nc["nal9602_longitude_fix"] gps_lat = self.combined_nc["nal9602_latitude_fix"] From 97cdb417926e5d0e514b65132e9632b9b6ef46fb Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 26 Nov 2025 17:33:16 -0800 Subject: [PATCH 114/121] Skip over InvalidCombinedFile(), i.e. without nudged positions. --- src/data/align.py | 11 +++++++++++ src/data/process.py | 7 +++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/data/align.py b/src/data/align.py index 9c373415..38edc25d 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -460,6 +460,17 @@ def process_combined(self) -> Path: # noqa: C901, PLR0912, PLR0915 nav_coords[coord_type] = coord_var self.logger.info("Found navigation coordinate: %s", coord_var) + # Check for required nudged coordinates + if "nudged_longitude" not in self.combined_nc or "nudged_latitude" not in self.combined_nc: + error_message = ( + f"Required nudged coordinates not found in {src_file}. " + "These are created during combine.py processing when GPS fixes are available. " + "Cannot proceed with alignment without nudged coordinates." + ) + raise InvalidCombinedFile(error_message) + + self.logger.info("Found nudged coordinates: nudged_longitude, nudged_latitude") + # Create interpolators for navigation coordinates try: lat_interp = interp1d( diff --git a/src/data/process.py b/src/data/process.py index f672d83b..5363b1a2 100755 --- a/src/data/process.py +++ b/src/data/process.py @@ -65,7 +65,7 @@ class data are: download_process and calibrate, while for LRAUV class data from pathlib import Path from socket import gethostname -from align import Align_NetCDF, InvalidCalFile +from align import Align_NetCDF, InvalidCalFile, InvalidCombinedFile from archive import LOG_NAME, Archiver from calibrate import EXPECTED_SENSORS, Calibrate_NetCDF from combine import Combine_NetCDF @@ -1044,7 +1044,10 @@ def process_log_files(self) -> None: # Extract AUV name from path self.auv_name = log_file.split("/")[0].lower() self.logger.info("Processing log file: %s", log_file) - self.process_log_file(log_file) + try: + self.process_log_file(log_file) + except (InvalidCalFile, InvalidCombinedFile) as e: + self.logger.warning("%s", e) else: self.logger.error("Must provide either --log_file or both --start and --end arguments") return From ad4299f37487f4b0c8cac46db80f2a4546a29d26 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Wed, 26 Nov 2025 17:42:51 -0800 Subject: [PATCH 115/121] Update documentation to reflect the actual processing done by process_lrauv.py --- LRAUV_WORKFLOW.md | 35 +++++++++++++++++++---------------- README.md | 7 ++++++- TROUBLESHOOTING.md | 15 +++++++++++++++ 3 files changed, 40 insertions(+), 17 deletions(-) diff --git a/LRAUV_WORKFLOW.md b/LRAUV_WORKFLOW.md index 4fcfc26f..0bfa68e8 100644 --- a/LRAUV_WORKFLOW.md +++ b/LRAUV_WORKFLOW.md @@ -36,24 +36,27 @@ on the local file system's work directory is as follows: The data in the .nc files are identical to what is in the .nc4 groups. combine.py - Apply calibration coefficients to the original data. The calibrated data - are written to a new netCDF file in the missionnetcdfs/ - directory ending with _cal.nc. This step also includes nudging the - underwater portions of the navigation positions to the GPS fixes - done at the surface and applying pitch corrections to the sensor - depth for those sensors (instruments) for which offset values are - specified in SensorInfo. Some minimal QC is done in this step, namely - removal on non-monotonic times. The record variables in the netCDF - file have only their original coordinates, namely time associated with - them. + Combine all group data into a single NetCDF file with consolidated + time coordinates. When GPS fix data is available, this step includes + nudging the underwater portions of the navigation positions to the + GPS fixes done at the surface. GPS fixes are filtered to ensure + monotonically increasing timestamps before nudging. Some minimal QC + is done in this step, namely removal of non-monotonic times. The + nudged coordinates are added as separate variables (nudged_longitude, + nudged_latitude) with their own time dimension. For missions without + GPS data, the combine step completes successfully but without nudged + coordinates. align.py - Interpolate corrected lat/lon variables to the original sampling - intervals for each instrument's record variables. This format is - analogous to the .nc4 files produced by the LRAUV unserialize - process. These are the best files to use for the highest temporal - resolution of the data. Unlike the .nc4 files align.py's output files - use a naming convention rather than netCDF4 groups for each instrument. + Interpolate nudged lat/lon variables to the original sampling + intervals for each instrument's record variables. This step requires + nudged coordinates from combine.py and will fail with an informative + error if they are not present (as in missions without GPS data). + This format is analogous to the .nc4 files produced by the LRAUV + unserialize process. These are the best files to use for the highest + temporal resolution of the data. Unlike the .nc4 files, align.py's + output files use a naming convention rather than netCDF4 groups for + each instrument. resample.py Produce a netCDF file with all of the instrument's record variables diff --git a/README.md b/README.md index 4861c407..d7a45b50 100644 --- a/README.md +++ b/README.md @@ -59,8 +59,9 @@ print out the usage information for each of the processing scripts: uv run src/data/archive.py --help uv run src/data/process_i2map.py --help uv run src/data/process_dorado.py --help + uv run src/data/process_lrauv.py --help -See [DORADO_WORKFLOW.md](DORADO_WORKFLOW.md) for more details on the data processing workflow. +See [DORADO_WORKFLOW.md](DORADO_WORKFLOW.md) and [LRAUV_WORKFLOW.md](LRAUV_WORKFLOW.md) for more details on the data processing workflows. ### Jupyter Notebooks ### To run the Jupyter Notebooks, start Jupyter Lab at the command line with: @@ -118,6 +119,10 @@ After installation and when logging into the server again mission data can be pr `docker compose run --rm auvpython src/data/process_i2map.py --help` * To actually process a mission and have the processed data copied to the archive use the `-v` and `--clobber` options, e.g.: `docker compose run --rm auvpython src/data/process_dorado.py --mission 2025.139.04 -v --clobber --noinput` +* To process LRAUV data for a specific vehicle and time range: + `docker compose run --rm auvpython src/data/process_lrauv.py --auv_name tethys --start 20250401T000000 --end 20250502T000000 -v --noinput` +* To process a specific LRAUV log file: + `docker compose run --rm auvpython src/data/process_lrauv.py --log_file tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4 -v --noinput` -- diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md index 7d159a04..196ed21a 100644 --- a/TROUBLESHOOTING.md +++ b/TROUBLESHOOTING.md @@ -19,3 +19,18 @@ and make sure that it's the only entry in "process_dorado" that is uncommented. 4. After all of the intermediate files are created any step of the workflow may be executed and debugged in VS Code. The `.vscode\launch.json` file has several example entries that can be modified for specific debugging purposes via the menu in the Run and Debug panel. 5. For example to test bioluminesence proxy corrections a breakpoint can be set in the resample.py file and `4.0 - resample.py` can be debugged for the appropriate mission entered into that section of `.vscode\launch.json`. BTW, I prefer not to have that .json file formatted, so I disable the `json.format.enable` setting in VS Code, or save the file with Cmd-K S. This makes it easier to comment out and enable specific processing to be done. + +## Process LRAUV log files + +1. For LRAUV data, add an entry to `.vscode/launch.json` in the "process_lrauv" section: +``` +"args": ["-v", "1", "--auv_name", "tethys", "--start", "20250401T000000", "--end", "20250502T000000", "--noinput", "--no_cleanup"] +``` +or to process a specific log file: +``` +"args": ["-v", "1", "--log_file", "tethys/missionlogs/2012/20120908_20120920/20120917T025522/201209170255_201209171110.nc4", "--noinput", "--no_cleanup"] +``` + +2. From VS Code's Run and Debug panel select "process_lrauv" and click the green Start Debugging play button. For data to be accessed, the smb://atlas.shore.mbari.org/LRAUV share must be mounted on your computer (typically at /Volumes/LRAUV on macOS). + +3. LRAUV log data will be processed through: nc42netcdfs.py → combine.py → align.py → resample.py as described in [LRAUV_WORKFLOW.md](LRAUV_WORKFLOW.md). Note that missions without GPS fixes will complete combine.py but cannot proceed through align.py as nudged coordinates are required for alignment. From faeea85bef3c36bc4c3f0f520c177ab845cc808c Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Fri, 28 Nov 2025 15:32:33 -0800 Subject: [PATCH 116/121] Add _expand_ubat_to_60hz() to save raw ubat data in the combined.nc file. --- .vscode/launch.json | 8 +- src/data/align.py | 26 ++--- src/data/combine.py | 111 +++++++++++++++++++++ src/data/test_process_lrauv.py | 170 +++++++++++++++++++++++++++++++++ 4 files changed, 302 insertions(+), 13 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 7b702f43..2bd38454 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -353,8 +353,12 @@ //"args": ["-v", "1", "--auv_name", "brizo", "--start", "20250915T000000", "--end", "20250917T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] // No nudged latitude and longitude variables - fixed as of 26 Nov 2025 //"args": ["-v", "1", "--log_file", "brizo/missionlogs/2025/20250909_20250915/20250915T015535/202509150155_202509151602.nc4", "--no_cleanup"] - // Plankitvore deployment for CeNCOOS Syncro - "args": ["-v", "1", "--auv_name", "ahi", "--start", "20250401T000000", "--end", "20250502T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] + // Plankitvore deployment for CeNCOOS Syncro - whole month of April 2025 + //"args": ["-v", "1", "--auv_name", "ahi", "--start", "20250401T000000", "--end", "20250502T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] + // Fails with ValueError: different number of dimensions on data and dims: 2 vs 1 for wetlabsubat_digitized_raw_ad_counts variable + "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup"] + // Full month of June 2025 for Pontus with WetLabsUBAT Group data + //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] }, ] diff --git a/src/data/align.py b/src/data/align.py index 38edc25d..d7747e41 100755 --- a/src/data/align.py +++ b/src/data/align.py @@ -538,20 +538,24 @@ def process_combined(self) -> Path: # noqa: C901, PLR0912, PLR0915 continue # Try to find the corresponding time coordinate - # Look for pattern: group_name + "_time" - possible_time_coords = [] - for i in range(len(var_parts)): - group_candidate = "_".join(var_parts[: i + 1]) - time_coord_candidate = f"{group_candidate}_time" - if time_coord_candidate in self.combined_nc: - possible_time_coords.append((group_candidate, time_coord_candidate)) - - if not possible_time_coords: + # Check what time coordinate the variable actually uses + var_dims = self.combined_nc[variable].dims + var_time_coords = [dim for dim in var_dims if "time" in dim.lower()] + + if not var_time_coords: self.logger.warning("No time coordinate found for variable: %s", variable) continue - # Use the longest matching group name (most specific) - group_name, timevar = max(possible_time_coords, key=lambda x: len(x[0])) + # Use the time coordinate that the variable actually has + timevar = var_time_coords[0] # Should only be one time dimension + # Extract group name from time coordinate + if timevar.endswith("_time_60hz"): + group_name = timevar[:-10] # Remove "_time_60hz" (10 chars) + elif timevar.endswith("_time"): + group_name = timevar[:-5] # Remove "_time" + else: + group_name = timevar + self.logger.debug( "Processing %s with group %s and time %s", variable, group_name, timevar ) diff --git a/src/data/combine.py b/src/data/combine.py index 7af50804..1aba0a49 100755 --- a/src/data/combine.py +++ b/src/data/combine.py @@ -561,6 +561,26 @@ def _create_data_array_for_variable( data_array.attrs = ds[orig_var].attrs.copy() data_array.attrs["units"] = "degrees" data_array.attrs["coordinates"] = f"{dim_name}" + elif len(ds[orig_var].dims) == 2: # noqa: PLR2004 + # Handle 2D arrays (time, array_index) - e.g. biolume_raw, digitized_raw_ad_counts_M + second_dim_name = ds[orig_var].dims[1] + second_dim_size = ds[orig_var].shape[1] + self.logger.debug( + "Reading 2 dimensional %s data arrays with shape %s", + orig_var, + ds[orig_var].shape, + ) + data_array = xr.DataArray( + ds[orig_var].to_numpy(), + dims=[dim_name, second_dim_name], + coords={ + dim_name: time_coord_data, + second_dim_name: np.arange(second_dim_size), + }, + ) + data_array.attrs = ds[orig_var].attrs.copy() + data_array.attrs["comment"] = f"{orig_var} from group {ds.attrs.get('group_name', '')}" + data_array.attrs["coordinates"] = f"{dim_name} {second_dim_name}" else: data_array = xr.DataArray( ds[orig_var].to_numpy(), @@ -625,6 +645,94 @@ def _add_consolidation_comment(self, time_info: dict) -> None: f"Consolidated time coordinate from: {mapping_info}" ) + def _expand_ubat_to_60hz(self) -> None: + """Expand UBAT digitized_raw_ad_counts 2D array into 60hz time series. + + Replaces the 2D array with a 1D 60Hz time series, analogous to how + Dorado biolume_raw is stored with a time60hz coordinate. + """ + ubat_var = "wetlabsubat_digitized_raw_ad_counts" + + if ubat_var not in self.combined_nc: + self.logger.debug( + "No UBAT digitized_raw_ad_counts variable found, skipping 60hz expansion" + ) + return + + self.logger.info("Expanding UBAT %s to 60hz time series", ubat_var) + + # Get the 2D array (time, sample_index) + ubat_2d = self.combined_nc[ubat_var] + + if len(ubat_2d.dims) != 2: # noqa: PLR2004 + self.logger.warning("UBAT variable is not 2D, skipping 60hz expansion") + return + + time_dim = ubat_2d.dims[0] + n_samples = ubat_2d.shape[1] + + # Get the time coordinate + time_coord = self.combined_nc[time_dim] + n_times = len(time_coord) + + # Save original attributes before removing + original_attrs = ubat_2d.attrs.copy() + + # Calculate 60hz time offsets (assuming samples span 1 second) + # Each sample is 1/60th of a second apart + sample_offsets = np.arange(n_samples) / 60.0 + + # Create 60hz time series by adding offsets to each 1Hz time + time_60hz_list = [] + for i in range(n_times): + base_time = time_coord.to_numpy()[i] + # Add offsets to create 60 timestamps per second + times_for_this_second = base_time + sample_offsets + time_60hz_list.append(times_for_this_second) + + # Flatten the arrays + time_60hz = np.concatenate(time_60hz_list) + data_60hz = ubat_2d.to_numpy().flatten() + + # Remove the old 2D variable + del self.combined_nc[ubat_var] + + # Create new 60hz time coordinate with attributes + time_60hz_name = f"{time_dim}_60hz" + time_60hz_coord = xr.DataArray( + time_60hz, + dims=[time_60hz_name], + name=time_60hz_name, + attrs={ + "units": "seconds since 1970-01-01T00:00:00Z", + "standard_name": "time", + "long_name": "Time at 60Hz sampling rate", + }, + ) + + # Create replacement 1D variable with 60hz time coordinate + self.combined_nc[ubat_var] = xr.DataArray( + data_60hz, + coords={time_60hz_name: time_60hz_coord}, + dims=[time_60hz_name], + name=ubat_var, + ) + + # Restore and update attributes + self.combined_nc[ubat_var].attrs = original_attrs + self.combined_nc[ubat_var].attrs["long_name"] = "UBAT digitized raw AD counts at 60Hz" + self.combined_nc[ubat_var].attrs["coordinates"] = time_60hz_name + self.combined_nc[ubat_var].attrs["comment"] = ( + original_attrs.get("comment", "") + " Expanded from 2D to 1D 60Hz time series" + ) + + self.logger.info( + "Replaced 2D %s with 1D 60hz time series: %d samples from %d 1Hz records", + ubat_var, + len(data_60hz), + n_times, + ) + def _initial_coordinate_qc(self) -> None: """Perform initial QC on core coordinate variables for specific log files.""" if self.log_file in ( @@ -784,6 +892,9 @@ def combine_groups(self) -> None: # Collect variable coordinate mapping by group, which can be flattened self.variable_time_coord_mapping.update(time_info["variable_time_coord_mapping"]) + # Expand UBAT 2D arrays to 60hz time series + self._expand_ubat_to_60hz() + # Write intermediate file for cf_xarray decoding intermediate_file = self._intermediate_write_netcdf() with xr.open_dataset(intermediate_file, decode_cf=True) as ds: diff --git a/src/data/test_process_lrauv.py b/src/data/test_process_lrauv.py index bf30d667..1b0c080b 100644 --- a/src/data/test_process_lrauv.py +++ b/src/data/test_process_lrauv.py @@ -143,3 +143,173 @@ def test_lrauv_full_pipeline(complete_lrauv_processing): # This would test the full pipeline but requires significant mocking # of calibration files, configuration, etc. pass # noqa: PIE790 + + +def test_lrauv_2d_array_variable_handling(tmp_path): + """Test that 2D array variables (time, array_index) are handled correctly in combine.py.""" + from combine import Combine_NetCDF + + # Create a minimal test that exercises the _create_data_array_for_variable method + # with a 2D variable + + # Create time array + time_vals = np.arange( + np.datetime64("2025-06-08T02:00:00"), + np.datetime64("2025-06-08T03:00:00"), + np.timedelta64(10, "s"), # 360 time points + ) + + # Create a mock dataset with a 2D variable + ds = xr.Dataset( + { + # 2D variable - 60 samples per time point (like biolume_raw) + "biolume_array": (["time", "sample"], np.random.uniform(0, 100, (len(time_vals), 60))), + # 1D variable for comparison + "temperature": (["time"], np.random.uniform(10, 15, len(time_vals))), + }, + coords={"time": time_vals}, + ) + + # Create a Combine_NetCDF instance (minimal setup) + combine = Combine_NetCDF( + log_file="test/test.nc4", + verbose=1, + ) + + # Mock the time coordinate data + time_coord_data = time_vals.astype("datetime64[ns]").astype("int64") / 1e9 + + # Test 1D variable (should work) + data_array_1d = combine._create_data_array_for_variable( + ds, "temperature", "test_time", time_coord_data + ) + assert len(data_array_1d.dims) == 1 # noqa: PLR2004, S101 + assert data_array_1d.dims[0] == "test_time" # noqa: S101 + + # Test 2D variable (this is what fails without the fix) + try: + data_array_2d = combine._create_data_array_for_variable( + ds, "biolume_array", "test_time", time_coord_data + ) + # After the fix, this should work + assert len(data_array_2d.dims) == 2 # noqa: PLR2004, S101 + assert "test_time" in data_array_2d.dims # noqa: S101 + assert data_array_2d.shape[1] == 60 # noqa: PLR2004, S101 # Second dimension should be 60 + except ValueError as e: + if "different number of dimensions" in str(e): + pytest.fail(f"2D array handling not implemented: {e}") + raise + + +def test_ubat_60hz_expansion(tmp_path): + """Test that UBAT 2D digitized_raw_ad_counts array is expanded to 60hz time series.""" + from combine import Combine_NetCDF + + # Create time array for 1Hz data + time_vals = np.arange( + np.datetime64("2025-06-08T02:00:00"), + np.datetime64("2025-06-08T02:00:10"), # 10 seconds + np.timedelta64(1, "s"), + ) + time_seconds = time_vals.astype("datetime64[ns]").astype("int64") / 1e9 + + # Create a Combine_NetCDF instance + combine = Combine_NetCDF( + log_file="test/test.nc4", + verbose=1, + ) + + # Create mock combined_nc with UBAT 2D data + combine.combined_nc = xr.Dataset( + { + "wetlabsubat_digitized_raw_ad_counts": ( + ["wetlabsubat_time", "sample"], + np.random.randint(0, 1000, (len(time_vals), 60)), + ), + }, + coords={"wetlabsubat_time": time_seconds}, + ) + + # Add attributes to match real data + combine.combined_nc["wetlabsubat_digitized_raw_ad_counts"].attrs = { + "long_name": "Digitized raw AD counts", + "comment": "Test UBAT data", + } + + # Call the expansion method + combine._expand_ubat_to_60hz() + + # Check that the original variable is now 1D with 60hz time coordinate + # (analogous to Dorado biolume_raw with TIME60HZ) + assert "wetlabsubat_digitized_raw_ad_counts" in combine.combined_nc # noqa: S101 + assert "wetlabsubat_time_60hz" in combine.combined_nc # noqa: S101 + + # Check dimensions - should now be 1D with 60hz time + ubat_var = combine.combined_nc["wetlabsubat_digitized_raw_ad_counts"] + assert len(ubat_var.dims) == 1 # noqa: PLR2004, S101 + assert ubat_var.dims[0] == "wetlabsubat_time_60hz" # noqa: S101 + + # Check shape - should have 60 samples per second, so 10 seconds * 60 = 600 samples + expected_samples = len(time_vals) * 60 # noqa: PLR2004 + assert len(ubat_var) == expected_samples # noqa: S101 + + # Check time coordinate has proper attributes + time_60hz = combine.combined_nc["wetlabsubat_time_60hz"] + assert time_60hz.attrs["units"] == "seconds since 1970-01-01T00:00:00Z" # noqa: S101 + assert time_60hz.attrs["standard_name"] == "time" # noqa: S101 + + # Check attributes were copied + assert "long_name" in ubat_var.attrs # noqa: S101 + assert "coordinates" in ubat_var.attrs # noqa: S101 + + +def _find_time_coordinate(variable: str, combined_nc_vars: dict) -> str: + """Helper to find time coordinate for a variable (mimics align.py logic).""" + var_parts = variable.split("_") + possible_time_coords = [] + + for i in range(len(var_parts)): + group_candidate = "_".join(var_parts[: i + 1]) + for suffix in ["_time", "_time_60hz"]: + time_coord = f"{group_candidate}{suffix}" + if time_coord in combined_nc_vars: + possible_time_coords.append((group_candidate, time_coord)) + + if not possible_time_coords: + return None + + # For 60hz variables, prefer 60hz time coordinates + has_60hz_time = any(tc[1].endswith("_60hz") for tc in possible_time_coords) + if variable.endswith("_60hz") and has_60hz_time: + time_60hz_coords = [(g, t) for g, t in possible_time_coords if t.endswith("_60hz")] + return max(time_60hz_coords, key=lambda x: len(x[0]))[1] + + # For regular variables, prefer non-60hz time coordinates + non_60hz_coords = [(g, t) for g, t in possible_time_coords if not t.endswith("_60hz")] + if non_60hz_coords: + return max(non_60hz_coords, key=lambda x: len(x[0]))[1] + + return max(possible_time_coords, key=lambda x: len(x[0]))[1] + + +def test_align_60hz_time_coordinate_matching(): + """Test that variables with 60hz time coordinates are matched correctly.""" + # Mock dataset with both regular and 60hz time coordinates + combined_nc_vars = { + "wetlabsubat_time": True, + "wetlabsubat_time_60hz": True, + } + + # Test 1: Regular variable should match regular time coordinate + timevar = _find_time_coordinate("wetlabsubat_flow_rate", combined_nc_vars) + assert timevar == "wetlabsubat_time" # noqa: S101 + assert not timevar.endswith("_60hz") # noqa: S101 + + # Test 2: UBAT variable (now 1D with 60hz time) should match 60hz time coordinate + # Note: After expansion in combine.py, wetlabsubat_digitized_raw_ad_counts + # has coordinate wetlabsubat_time_60hz (variable name has NO _60hz suffix) + timevar = _find_time_coordinate("wetlabsubat_digitized_raw_ad_counts", combined_nc_vars) + # This will match wetlabsubat_time (the regular one) because the variable name + # doesn't have _60hz suffix. The actual coordinate binding happens in align.py + # by reading the variable's coordinate, not by name matching. + assert timevar == "wetlabsubat_time" # noqa: S101 From d9438f360491bc1cb476f3aa4d23d5c2311b929c Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 1 Dec 2025 09:50:16 -0800 Subject: [PATCH 117/121] Remove "rename" from _PARMS dictionaries This change clearly keeps the original variable names, but in lower case. --- .vscode/launch.json | 3 +- src/data/nc42netcdfs.py | 161 +++++++++++++--------------------------- 2 files changed, 53 insertions(+), 111 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 2bd38454..ca7cd39c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -191,7 +191,8 @@ //"args": ["--auv_name", "dorado", "--mission", "2020.337.00", "-v", "1"], //"args": ["--auv_name", "dorado", "--mission", "2023.123.00", "-v", "1"], //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] - "args": ["--auv_name", "dorado", "--mission", "2025.316.02", "-v", "1"], + //"args": ["--auv_name", "dorado", "--mission", "2025.316.02", "-v", "1"], + "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4"], }, { "name": "5.0 - archive.py", diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index b188c234..f3105418 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -39,117 +39,79 @@ SCI_PARMS = { "/": [ - { - "name": "concentration_of_colored_dissolved_organic_matter_in_sea_water", - "rename": "colored_dissolved_organic_matter", - }, - {"name": "longitude", "rename": "longitude"}, - {"name": "latitude", "rename": "latitude"}, - {"name": "depth", "rename": "depth"}, - {"name": "time", "rename": "time"}, + {"name": "concentration_of_colored_dissolved_organic_matter_in_sea_water"}, + {"name": "longitude"}, + {"name": "latitude"}, + {"name": "depth"}, + {"name": "time"}, ], - "Aanderaa_O2": [{"name": "mass_concentration_of_oxygen_in_sea_water", "rename": "oxygen"}], + "Aanderaa_O2": [{"name": "mass_concentration_of_oxygen_in_sea_water"}], "CTD_NeilBrown": [ - {"name": "sea_water_salinity", "rename": "salinity"}, - {"name": "sea_water_temperature", "rename": "temperature"}, + {"name": "sea_water_salinity"}, + {"name": "sea_water_temperature"}, ], "CTD_Seabird": [ - {"name": "sea_water_salinity", "rename": "salinity"}, - {"name": "sea_water_temperature", "rename": "temperature"}, - { - "name": "mass_concentration_of_oxygen_in_sea_water", - "rename": "mass_concentration_of_oxygen_in_sea_water", - }, + {"name": "sea_water_salinity"}, + {"name": "sea_water_temperature"}, + {"name": "mass_concentration_of_oxygen_in_sea_water"}, ], - "ISUS": [{"name": "mole_concentration_of_nitrate_in_sea_water", "rename": "nitrate"}], - "PAR_Licor": [{"name": "downwelling_photosynthetic_photon_flux_in_sea_water", "rename": "PAR"}], + "ISUS": [{"name": "mole_concentration_of_nitrate_in_sea_water"}], + "PAR_Licor": [{"name": "downwelling_photosynthetic_photon_flux_in_sea_water"}], "WetLabsBB2FL": [ - {"name": "mass_concentration_of_chlorophyll_in_sea_water", "rename": "chlorophyll"}, - {"name": "OutputChl", "rename": "chl"}, - {"name": "Output470", "rename": "bbp470"}, - {"name": "Output650", "rename": "bbp650"}, - {"name": "VolumeScatCoeff117deg470nm", "rename": "volumescatcoeff117deg470nm"}, - {"name": "VolumeScatCoeff117deg650nm", "rename": "volumescatcoeff117deg650nm"}, - { - "name": "ParticulateBackscatteringCoeff470nm", - "rename": "particulatebackscatteringcoeff470nm", - }, - { - "name": "ParticulateBackscatteringCoeff650nm", - "rename": "particulatebackscatteringcoeff650nm", - }, + {"name": "mass_concentration_of_chlorophyll_in_sea_water"}, + {"name": "OutputChl"}, + {"name": "Output470"}, + {"name": "Output650"}, + {"name": "VolumeScatCoeff117deg470nm"}, + {"name": "VolumeScatCoeff117deg650nm"}, + {"name": "ParticulateBackscatteringCoeff470nm"}, + {"name": "ParticulateBackscatteringCoeff650nm"}, ], "WetLabsSeaOWL_UV_A": [ - { - "name": "concentration_of_chromophoric_dissolved_organic_matter_in_sea_water", - "rename": "chromophoric_dissolved_organic_matter", - }, - {"name": "mass_concentration_of_chlorophyll_in_sea_water", "rename": "chlorophyll"}, - {"name": "BackscatteringCoeff700nm", "rename": "BackscatteringCoeff700nm"}, - {"name": "VolumeScatCoeff117deg700nm", "rename": "VolumeScatCoeff117deg700nm"}, - { - "name": "mass_concentration_of_petroleum_hydrocarbons_in_sea_water", - "rename": "petroleum_hydrocarbons", - }, + {"name": "concentration_of_chromophoric_dissolved_organic_matter_in_sea_water"}, + {"name": "mass_concentration_of_chlorophyll_in_sea_water"}, + {"name": "BackscatteringCoeff700nm"}, + {"name": "VolumeScatCoeff117deg700nm"}, + {"name": "mass_concentration_of_petroleum_hydrocarbons_in_sea_water"}, ], "WetLabsUBAT": [ - {"name": "average_bioluminescence", "rename": "average_bioluminescence"}, - {"name": "flow_rate", "rename": "ubat_flow_rate"}, - {"name": "digitized_raw_ad_counts", "rename": "digitized_raw_ad_counts"}, + {"name": "average_bioluminescence"}, + {"name": "flow_rate"}, + {"name": "digitized_raw_ad_counts"}, ], } ENG_PARMS = { "BPC1": [ - {"name": "platform_battery_charge", "rename": "health_platform_battery_charge"}, - {"name": "platform_battery_voltage", "rename": "health_platform_average_voltage"}, - ], - "BuoyancyServo": [ - {"name": "platform_buoyancy_position", "rename": "control_inputs_buoyancy_position"} + {"name": "platform_battery_charge"}, + {"name": "platform_battery_voltage"}, ], + "BuoyancyServo": [{"name": "platform_buoyancy_position"}], "DeadReckonUsingMultipleVelocitySources": [ - { - "name": "fix_residual_percent_distance_traveled", - "rename": ( - "fix_residual_percent_distance_traveled_DeadReckonUsingMultipleVelocitySources" - ), - }, - {"name": "longitude", "rename": "pose_longitude_DeadReckonUsingMultipleVelocitySources"}, - {"name": "latitude", "rename": "pose_latitude_DeadReckonUsingMultipleVelocitySources"}, - {"name": "depth", "rename": "pose_depth_DeadReckonUsingMultipleVelocitySources"}, + {"name": "fix_residual_percent_distance_traveled"}, + {"name": "longitude"}, + {"name": "latitude"}, + {"name": "depth"}, ], "DeadReckonUsingSpeedCalculator": [ - { - "name": "fix_residual_percent_distance_traveled", - "rename": "fix_residual_percent_distance_traveled_DeadReckonUsingSpeedCalculator", - }, - {"name": "longitude", "rename": "pose_longitude_DeadReckonUsingSpeedCalculator"}, - {"name": "latitude", "rename": "pose_latitude_DeadReckonUsingSpeedCalculator"}, - {"name": "depth", "rename": "pose_depth_DeadReckonUsingSpeedCalculator"}, + {"name": "fix_residual_percent_distance_traveled"}, + {"name": "longitude"}, + {"name": "latitude"}, + {"name": "depth"}, ], - "ElevatorServo": [ - {"name": "platform_elevator_angle", "rename": "control_inputs_elevator_angle"} - ], - "MassServo": [{"name": "platform_mass_position", "rename": "control_inputs_mass_position"}], + "ElevatorServo": [{"name": "platform_elevator_angle"}], + "MassServo": [{"name": "platform_mass_position"}], "NAL9602": [ - {"name": "time_fix", "rename": "fix_time"}, - {"name": "latitude_fix", "rename": "fix_latitude"}, - {"name": "longitude_fix", "rename": "fix_longitude"}, - ], - "Onboard": [{"name": "platform_average_current", "rename": "health_platform_average_current"}], - "RudderServo": [{"name": "platform_rudder_angle", "rename": "control_inputs_rudder_angle"}], - "ThrusterServo": [ - { - "name": "platform_propeller_rotation_rate", - "rename": "control_inputs_propeller_rotation_rate", - } + {"name": "time_fix"}, + {"name": "latitude_fix"}, + {"name": "longitude_fix"}, ], + "Onboard": [{"name": "platform_average_current"}], + "RudderServo": [{"name": "platform_rudder_angle"}], + "ThrusterServo": [{"name": "platform_propeller_rotation_rate"}], "CurrentEstimator": [ - { - "name": "current_direction_navigation_frame", - "rename": "current_direction_navigation_frame", - }, - {"name": "current_speed_navigation_frame", "rename": "current_speed_navigation_frame"}, + {"name": "current_direction_navigation_frame"}, + {"name": "current_speed_navigation_frame"}, ], } @@ -192,16 +154,6 @@ def __init__( # noqa: PLR0913 self.verbose = verbose self.commandline = commandline - def show_variable_mapping(self): - """Show the variable mapping.""" - for group, parms in sorted(SCIENG_PARMS.items()): - print(f"Group: {group}") # noqa: T201 - for parm in parms: - name = parm.get("name", "N/A") - rename = parm.get("rename", "N/A") - print(f" {name} -> {rename}") # noqa: T201 - print() # noqa: T201 - def download_with_pooch(self, url, local_dir, known_hash=None): """Download using pooch with caching and verification.""" downloader = pooch.HTTPDownloader(timeout=(60, 300), progressbar=True) @@ -1197,13 +1149,6 @@ def process_command_line(self): "d1235ead55023bea05e9841465d54a45dfab007a283320322e28b84438fb8a85" ), ) - ( - parser.add_argument( - "--show_variable_mapping", - action="store_true", - help="Show the variable mapping: Group/variable_names -> their_renames", - ), - ) parser.add_argument( "--plot_time", action="store", @@ -1229,8 +1174,4 @@ def process_command_line(self): if __name__ == "__main__": extract = Extract() extract.process_command_line() - if extract.args.show_variable_mapping: - extract.show_variable_mapping() - sys.exit(0) - else: - extract.extract_groups_to_files_netcdf4(extract.args.log_file) + extract.extract_groups_to_files_netcdf4(extract.args.log_file) From 678bb42881887fe4c365264e0175a30617c728e7 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 1 Dec 2025 20:23:46 -0800 Subject: [PATCH 118/121] Add add_wetlabsubat_proxies() to compute biolume proxies for lrauv data. Also added _find_lat_lon_variables() for finding nav whether its dorado or lrauv. Also added test for lrauv ubat processing. --- .vscode/launch.json | 4 +- src/data/test_process_lrauv.py | 102 +++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 2 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index ca7cd39c..bb55b3d5 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -357,9 +357,9 @@ // Plankitvore deployment for CeNCOOS Syncro - whole month of April 2025 //"args": ["-v", "1", "--auv_name", "ahi", "--start", "20250401T000000", "--end", "20250502T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] // Fails with ValueError: different number of dimensions on data and dims: 2 vs 1 for wetlabsubat_digitized_raw_ad_counts variable - "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup"] + //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup"] // Full month of June 2025 for Pontus with WetLabsUBAT Group data - //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] + "args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] }, ] diff --git a/src/data/test_process_lrauv.py b/src/data/test_process_lrauv.py index 1b0c080b..06e298b2 100644 --- a/src/data/test_process_lrauv.py +++ b/src/data/test_process_lrauv.py @@ -1,6 +1,7 @@ # noqa: INP001 import numpy as np +import pandas as pd import pytest import xarray as xr @@ -313,3 +314,104 @@ def test_align_60hz_time_coordinate_matching(): # doesn't have _60hz suffix. The actual coordinate binding happens in align.py # by reading the variable's coordinate, not by name matching. assert timevar == "wetlabsubat_time" # noqa: S101 + + +def test_wetlabsubat_proxy_processing_with_realistic_coordinates(tmp_path): + """Test add_wetlabsubat_proxies with realistic LRAUV coordinate variable names. + + Real LRAUV data has instrument-prefixed coordinates like: + - parlicor_latitude, parlicor_longitude + - massservo_latitude, massservo_longitude + - nudged_latitude, nudged_longitude + - onboard_latitude, onboard_longitude + - wetlabsubat_latitude, wetlabsubat_longitude + + But NOT navigation_latitude/navigation_longitude (which exist in Dorado data). + This test ensures the coordinate lookup doesn't fail when navigation_* are missing. + """ + from resample import Resampler + + # Create time arrays + time_vals = pd.date_range("2025-06-08 02:00:00", periods=3600, freq="1s") # 1 hour + time_60hz_vals = pd.date_range("2025-06-08 02:00:00", periods=3600 * 60, freq="16666667ns") + + # Create a mock dataset with realistic LRAUV structure + # Key: NO navigation_latitude/navigation_longitude variables + ds = xr.Dataset( + { + # UBAT 60Hz raw data (after expansion from 2D to 1D) + "wetlabsubat_digitized_raw_ad_counts": ( + ["wetlabsubat_time_60hz"], + np.random.randint(200, 800, len(time_60hz_vals)), + ), + # Regular 1Hz variables + "wetlabsubat_flow_rate": ( + ["wetlabsubat_time"], + np.full(len(time_vals), 350.0), + ), + "wetlabsbb2fl_fluorescence": ( + ["wetlabsbb2fl_time"], + np.random.uniform(0, 5, len(time_vals)), + ), + # Realistic coordinate variables - instrument-prefixed, NO navigation_* + "nudged_latitude": (["nudged_time"], np.full(len(time_vals), 36.8)), + "nudged_longitude": (["nudged_time"], np.full(len(time_vals), -122.0)), + "onboard_latitude": (["onboard_time"], np.full(len(time_vals), 36.8)), + "onboard_longitude": (["onboard_time"], np.full(len(time_vals), -122.0)), + "wetlabsubat_latitude": ( + ["wetlabsubat_time"], + np.full(len(time_vals), 36.8), + ), + "wetlabsubat_longitude": ( + ["wetlabsubat_time"], + np.full(len(time_vals), -122.0), + ), + }, + coords={ + "wetlabsubat_time": time_vals.to_numpy(), + "wetlabsubat_time_60hz": time_60hz_vals.to_numpy(), + "wetlabsbb2fl_time": time_vals.to_numpy(), + "nudged_time": time_vals.to_numpy(), + "onboard_time": time_vals.to_numpy(), + }, + ) + + # Add attributes + ds["wetlabsubat_digitized_raw_ad_counts"].attrs = { + "long_name": "Digitized raw AD counts", + "units": "counts", + } + ds["nudged_latitude"].attrs = {"standard_name": "latitude", "units": "degrees_north"} + ds["nudged_longitude"].attrs = {"standard_name": "longitude", "units": "degrees_east"} + + # Create Resampler instance + resampler = Resampler( + auv_name="pontus", + log_file=None, + freq="1S", + verbose=0, + ) + + # Set the dataset + resampler.ds = ds + resampler.df_r = pd.DataFrame(index=time_vals) + + # Create mock resampled_nc (would normally be created by resample_variable) + resampler.resampled_nc = xr.Dataset(coords={"time": time_vals.to_numpy()}) + resampler.resampled_nc["wetlabsbb2fl_fluorescence"] = ( + ["time"], + np.random.uniform(0, 5, len(time_vals)), + ) + + # This should NOT raise KeyError for navigation_latitude/navigation_longitude + # The method should find nudged_latitude/longitude or another available coordinate + try: + resampler.add_wetlabsubat_proxies(freq="1S") + # If we get here, the coordinate lookup worked + assert True # noqa: S101 + except KeyError as e: + if "navigation_latitude" in str(e) or "navigation_longitude" in str(e): + pytest.fail( + f"Coordinate lookup failed - should find alternative to navigation_* variables: {e}" + ) + raise From f2da1aa3ba2dddded100d5d66ab4a70053e7fcfc Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Mon, 1 Dec 2025 20:24:29 -0800 Subject: [PATCH 119/121] Add add_wetlabsubat_proxies() to compute biolume proxies for lrauv data. Also added _find_lat_lon_variables() for finding nav whether its dorado or lrauv. Also added test for lrauv ubat processing. --- src/data/resample.py | 421 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 415 insertions(+), 6 deletions(-) diff --git a/src/data/resample.py b/src/data/resample.py index 08b859c5..1d467b9d 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -488,11 +488,14 @@ def select_nighttime_bl_raw( sunsets: A list of sunset times for each night. sunrises: A list of sunrise times for each night. """ - lat = float(self.ds["navigation_latitude"].median()) - lon = float(self.ds["navigation_longitude"].median()) + lat_var, lon_var = self._find_lat_lon_variables() + lat = float(self.ds[lat_var].median()) + lon = float(self.ds[lon_var].median()) self.logger.debug("Getting sun altitudes for nighttime selection") sun_alts = [] - for ts in self.ds["navigation_time"].to_numpy()[::stride]: + # Get the time coordinate for the latitude variable + time_coord = self.ds[lat_var].dims[0] + for ts in self.ds[time_coord].to_numpy()[::stride]: # About 10-minute resolution from 5 Hz navigation data sun_alts.append( # noqa: PERF401 get_altitude( @@ -504,9 +507,7 @@ def select_nighttime_bl_raw( # Find sunset and sunrise - where sun altitude changes sign sign_changes = np.where(np.diff(np.sign(sun_alts)))[0] - ss_sr_times = ( - self.ds["navigation_time"].isel({"navigation_time": sign_changes * stride}).to_numpy() - ) + ss_sr_times = self.ds[time_coord].isel({time_coord: sign_changes * stride}).to_numpy() self.logger.debug("Sunset and sunrise times: %s", ss_sr_times) sunsets = [] @@ -547,6 +548,43 @@ def select_nighttime_bl_raw( self.logger.info("No sunset or sunrise found during this mission.") return nighttime_bl_raw, sunsets, sunrises + def _find_lat_lon_variables(self) -> tuple[str, str]: + """Find latitude and longitude variables in the dataset. + + Searches for variables ending in _latitude and _longitude. + Prefers navigation_, nudged_, or onboard_ prefixes in that order. + + Returns: + tuple: (lat_var_name, lon_var_name) + + Raises: + KeyError: If no latitude/longitude variables are found + """ + lat_vars = [v for v in self.ds.variables if v.endswith("_latitude")] + lon_vars = [v for v in self.ds.variables if v.endswith("_longitude")] + + if not lat_vars or not lon_vars: + msg = ( + f"No latitude/longitude variables found. " + f"Available variables: {list(self.ds.variables.keys())}" + ) + raise KeyError(msg) + + # Prefer navigation_, then nudged_, then onboard_, then any other + for prefix in ["navigation_", "nudged_", "onboard_"]: + for lat_var in lat_vars: + if lat_var.startswith(prefix): + lon_var = prefix + "longitude" + if lon_var in lon_vars: + self.logger.debug("Using %s and %s for coordinates", lat_var, lon_var) + return lat_var, lon_var + + # Fall back to first available pair + lat_var = lat_vars[0] + lon_var = lon_vars[0] + self.logger.info("Using first available coordinates: %s and %s", lat_var, lon_var) + return lat_var, lon_var + def add_profile(self, depth_threshold: float) -> None: # Find depth vertices value using scipy's find_peaks algorithm options = {"prominence": 10, "width": 30} @@ -860,6 +898,345 @@ def add_biolume_proxies( # noqa: PLR0913, PLR0915 return fluo, sunsets, sunrises + def add_wetlabsubat_proxies( # noqa: PLR0913, PLR0915, C901, PLR0912 + self, + freq, + window_size_secs: int = 5, + envelope_mini: float = 1.5e10, + flash_threshold: float = FLASH_THRESHOLD, + proxy_ratio_adinos: float = 3.9811e13, # Default value for LRAUV + proxy_cal_factor: float = 0.00470, # Default value for LRAUV + ) -> tuple[pd.Series, list[datetime], list[datetime]]: + """Add biolume proxy variables computed from wetlabsubat_digitized_raw_ad_counts. + + This is parallel to add_biolume_proxies() but for LRAUV wetlabsubat data. + Computations follow Appendix B in Messie et al. 2019. + https://www.sciencedirect.com/science/article/pii/S0079661118300478 + """ + self.logger.info( + "Adding wetlabsubat proxy variables computed from wetlabsubat_digitized_raw_ad_counts" + ) + sample_rate = 60 # Assume all digitized_raw_ad_counts data is sampled at 60 Hz + window_size = window_size_secs * sample_rate + + # s_ubat_raw includes daytime data - see below for nighttime data + s_ubat_raw = self.ds["wetlabsubat_digitized_raw_ad_counts"].to_pandas().dropna() + + # Compute background biolumenesence envelope + self.logger.debug("Applying rolling min filter") + min_bg_unsmoothed = s_ubat_raw.rolling( + window_size, + min_periods=0, + center=True, + ).min() + min_bg = ( + min_bg_unsmoothed.rolling(window_size, min_periods=0, center=True).mean().to_numpy() + ) + + self.logger.debug("Applying rolling median filter") + med_bg_unsmoothed = s_ubat_raw.rolling( + window_size, + min_periods=0, + center=True, + ).median() + s_med_bg = med_bg_unsmoothed.rolling( + window_size, + min_periods=0, + center=True, + ).mean() + med_bg = s_med_bg.to_numpy() + max_bg = med_bg * 2.0 - min_bg + # envelope_mini: minimum value for the envelope (max_bgrd - med_bgrd) + # to avoid very dim flashes when the background is low + max_bg[max_bg - med_bg < envelope_mini] = ( + med_bg[max_bg - med_bg < envelope_mini] + envelope_mini + ) + + # Find the high and low peaks + self.logger.debug("Finding peaks") + peaks, _ = signal.find_peaks(s_ubat_raw, height=max_bg) + s_peaks = pd.Series(s_ubat_raw.iloc[peaks], index=s_ubat_raw.index[peaks]) + s_med_bg_peaks = pd.Series(s_med_bg.iloc[peaks], index=s_ubat_raw.index[peaks]) + if self.flash_threshold: + flash_threshold = self.flash_threshold + flash_threshold_note = f"Computed with flash_threshold = {flash_threshold:.0e}" + self.logger.info("Using flash_threshold = %.4e", flash_threshold) + nbflash_high = s_peaks[s_peaks > (s_med_bg_peaks + flash_threshold)] + nbflash_low = s_peaks[s_peaks <= (s_med_bg_peaks + flash_threshold)] + + # Construct full time series of flashes with NaNs for non-flash values + s_nbflash_high = pd.Series(np.nan, index=s_ubat_raw.index) + s_nbflash_high.loc[nbflash_high.index] = nbflash_high + s_nbflash_low = pd.Series(np.nan, index=s_ubat_raw.index) + s_nbflash_low.loc[nbflash_low.index] = nbflash_low + + # Count the number of flashes per second - use 15 second window stepping every second + flash_count_seconds = 15 + flash_window = flash_count_seconds * sample_rate + self.logger.debug("Counting flashes using %d second window", flash_count_seconds) + nbflash_high_counts = ( + s_nbflash_high.rolling(flash_window, step=1, min_periods=0, center=True) + .count() + .resample(freq.lower()) + .mean() + / flash_count_seconds + ) + nbflash_low_counts = ( + s_nbflash_low.rolling(flash_window, step=1, min_periods=0, center=True) + .count() + .resample(freq.lower()) + .mean() + / flash_count_seconds + ) + + # Get flow data - try both flow_rate and flow variable names + flow = None + if "wetlabsubat_flow_rate" in self.ds: + flow = ( + self.ds[["wetlabsubat_flow_rate"]]["wetlabsubat_flow_rate"] + .to_pandas() + .resample("1s") + .mean() + .ffill() + ) + self.logger.info("Using wetlabsubat_flow_rate for flow calculations") + elif "wetlabsubat_flow" in self.ds: + flow = ( + self.ds[["wetlabsubat_flow"]]["wetlabsubat_flow"] + .to_pandas() + .resample("1s") + .mean() + .ffill() + ) + self.logger.info("Using wetlabsubat_flow for flow calculations") + + # Flow sensor is not always on or may not be present, fill in 0.0 values with 350 ml/s + zero_note = "" + if flow is None: + self.logger.info("No flow data found - using constant 350 ml/s") + # Create flow series with same index as resampled data + flow = pd.Series(350.0, index=nbflash_high_counts.index) + zero_note = "No flow data available - used constant 350 ml/s" + else: + num_zero_flow = len(np.where(flow == 0)[0]) + if num_zero_flow > 0: + zero_note = ( + f"Zero flow values found: {num_zero_flow} of {len(flow)} " + f"- replaced with 350 ml/s" + ) + self.logger.info(zero_note) + flow = flow.replace(0.0, 350.0) + + # Compute flashes per liter - pandas.Series.divide() will match indexes + # Units: flashes per liter = (flashes per second / mL/s) * 1000 mL/L + self.logger.info( + "Computing flashes per liter: wetlabsubat_nbflash_high, wetlabsubat_nbflash_low" + ) + self.df_r["wetlabsubat_nbflash_high"] = nbflash_high_counts.divide(flow) * 1000 + self.df_r["wetlabsubat_nbflash_high"].attrs["long_name"] = ( + "High intensity flashes (copepods proxy)" + ) + self.df_r["wetlabsubat_nbflash_high"].attrs["units"] = "flashes/liter" + self.df_r["wetlabsubat_nbflash_high"].attrs["comment"] = ( + f"{zero_note} - {flash_threshold_note}" + ) + + self.df_r["wetlabsubat_nbflash_low"] = nbflash_low_counts.divide(flow) * 1000 + self.df_r["wetlabsubat_nbflash_low"].attrs["long_name"] = ( + "Low intensity flashes (Larvacean proxy)" + ) + self.df_r["wetlabsubat_nbflash_low"].attrs["units"] = "flashes/liter" + self.df_r["wetlabsubat_nbflash_low"].attrs["comment"] = ( + f"{zero_note} - {flash_threshold_note}" + ) + + # Flash intensity - proxy for small jellies - for entire mission, not just nightime + all_raw = self.ds[["wetlabsubat_digitized_raw_ad_counts"]][ + "wetlabsubat_digitized_raw_ad_counts" + ].to_pandas() + med_bg_60 = pd.Series( + np.interp(all_raw.index, s_med_bg.index, med_bg), + index=all_raw.index, + ) + intflash = ( + (all_raw - med_bg_60) + .rolling(flash_window, min_periods=0, center=True) + .max() + .resample("1s") + .mean() + ) + self.logger.info( + "Saving flash intensity: wetlabsubat_intflash - " + "the upper bound of the background envelope" + ) + self.df_r["wetlabsubat_intflash"] = intflash + self.df_r["wetlabsubat_intflash"].attrs["long_name"] = ( + "Flashes intensity (small jellies proxy)" + ) + self.df_r["wetlabsubat_intflash"].attrs["units"] = "counts" + self.df_r["wetlabsubat_intflash"].attrs["comment"] = ( + f"intensity of flashes from {sample_rate} Hz " + f"wetlabsubat_digitized_raw_ad_counts variable in {freq} intervals." + ) + + # Make min_bg a 1S pd.Series so that we can divide by flow, matching indexes + s_min_bg = min_bg_unsmoothed.rolling( + window_size, + min_periods=0, + center=True, + ).mean() + bg_biolume = pd.Series(s_min_bg, index=s_ubat_raw.index).resample("1s").mean() + self.logger.info("Saving Background bioluminescence (dinoflagellates proxy)") + self.df_r["wetlabsubat_bg_biolume"] = bg_biolume.divide(flow) * 1000 + self.df_r["wetlabsubat_bg_biolume"].attrs["long_name"] = ( + "Background bioluminescence (dinoflagellates proxy)" + ) + self.df_r["wetlabsubat_bg_biolume"].attrs["units"] = "counts/liter" + self.df_r["wetlabsubat_bg_biolume"].attrs["comment"] = zero_note + + fluo = None + nighttime_ubat_raw, sunsets, sunrises = self.select_nighttime_ubat_raw() + if nighttime_ubat_raw.empty: + self.logger.info( + "No nighttime wetlabsubat data to compute adinos, diatoms, hdinos proxies", + ) + else: + # (2) Phytoplankton proxies - look for wetlabsbb2fl fluorescence/chlorophyll data + fluo_var = None + for var in self.resampled_nc.variables: + if "wetlabsbb2fl" in var.lower() and ( + "fl" in var.lower() or "chlorophyll" in var.lower() + ): + fluo_var = var + break + + if fluo_var is None: + self.logger.info( + "No wetlabsbb2fl fluorescence data found. " + "Not computing adinos, diatoms, and hdinos" + ) + return fluo, sunsets, sunrises + + self.logger.info("Using %s for phytoplankton proxy calculations", fluo_var) + fluo = ( + self.resampled_nc[fluo_var] + .where( + (self.resampled_nc["time"] > min(sunsets)) + & (self.resampled_nc["time"] < max(sunrises)), + ) + .to_pandas() + .resample(freq.lower()) + .mean() + ) + # Set negative values from fluorescence to NaN + fluo[fluo < 0] = np.nan + self.logger.info("Using proxy_ratio_adinos = %.4e", proxy_ratio_adinos) + self.logger.info("Using proxy_cal_factor = %.6f", proxy_cal_factor) + + nighttime_bg_biolume = ( + pd.Series(s_min_bg, index=nighttime_ubat_raw.index).resample("1s").mean() + ) + nighttime_bg_biolume_perliter = nighttime_bg_biolume.divide(flow) * 1000 + pseudo_fluorescence = nighttime_bg_biolume_perliter / proxy_ratio_adinos + self.df_r["wetlabsubat_proxy_adinos"] = ( + np.minimum(fluo, pseudo_fluorescence) / proxy_cal_factor + ) + self.df_r["wetlabsubat_proxy_adinos"].attrs["comment"] = ( + f"Autotrophic dinoflagellate proxy using proxy_ratio_adinos" + f" = {proxy_ratio_adinos:.4e} and proxy_cal_factor = {proxy_cal_factor:.6f}" + ) + self.df_r["wetlabsubat_proxy_hdinos"] = ( + pseudo_fluorescence - np.minimum(fluo, pseudo_fluorescence) + ) / proxy_cal_factor + self.df_r["wetlabsubat_proxy_hdinos"].attrs["comment"] = ( + f"Heterotrophic dinoflagellate proxy using proxy_ratio_adinos" + f" = {proxy_ratio_adinos:.4e} and proxy_cal_factor = {proxy_cal_factor:.6f}" + ) + wetlabsubat_proxy_diatoms = (fluo - pseudo_fluorescence) / proxy_cal_factor + wetlabsubat_proxy_diatoms[wetlabsubat_proxy_diatoms < 0] = 0 + self.df_r["wetlabsubat_proxy_diatoms"] = wetlabsubat_proxy_diatoms + self.df_r["wetlabsubat_proxy_diatoms"].attrs["comment"] = ( + f"Diatom proxy using proxy_ratio_adinos" + f" = {proxy_ratio_adinos:.4e} and proxy_cal_factor = {proxy_cal_factor:.6f}" + ) + + return fluo, sunsets, sunrises + + def select_nighttime_ubat_raw( + self, + stride: int = 3000, + ) -> tuple[pd.Series, list[datetime], list[datetime]]: + """ + Select nighttime wetlabsubat_digitized_raw_ad_counts data for multiple nights in a mission. + Parallel to select_nighttime_bl_raw() but for LRAUV wetlabsubat data. + Default stride of 3000 gives 10-minute resolution from 5 Hz navigation data. + + Returns: + nighttime_ubat_raw: A pandas Series containing nighttime ubat data. + sunsets: A list of sunset times for each night. + sunrises: A list of sunrise times for each night. + """ + lat_var, lon_var = self._find_lat_lon_variables() + lat = float(self.ds[lat_var].median()) + lon = float(self.ds[lon_var].median()) + self.logger.debug("Getting sun altitudes for nighttime selection") + sun_alts = [] + # Get the time coordinate for the latitude variable + time_coord = self.ds[lat_var].dims[0] + for ts in self.ds[time_coord].to_numpy()[::stride]: + # About 10-minute resolution from 5 Hz navigation data + sun_alts.append( # noqa: PERF401 + get_altitude( + lat, + lon, + datetime.fromtimestamp(ts.astype(int) / 1.0e9, tz=UTC), + ), + ) + + # Find sunset and sunrise - where sun altitude changes sign + sign_changes = np.where(np.diff(np.sign(sun_alts)))[0] + ss_sr_times = self.ds[time_coord].isel({time_coord: sign_changes * stride}).to_numpy() + self.logger.debug("Sunset and sunrise times: %s", ss_sr_times) + + sunsets = [] + sunrises = [] + nighttime_ubat_raw = pd.Series(dtype="float64") + + # Iterate over sunset and sunrise pairs + for i in range(0, len(ss_sr_times) - 1, 2): + sunset = ss_sr_times[i] + pd.to_timedelta(1, "h") # 1 hour past sunset + sunrise = ss_sr_times[i + 1] - pd.to_timedelta(1, "h") # 1 hour before sunrise + sunsets.append(sunset) + sunrises.append(sunrise) + + self.logger.info( + "Extracting wetlabsubat_digitized_raw_ad_counts data " + "between sunset %s and sunrise %s", + sunset, + sunrise, + ) + nighttime_data = ( + self.ds["wetlabsubat_digitized_raw_ad_counts"] + .where( + (self.ds["wetlabsubat_time_60hz"] > sunset) + & (self.ds["wetlabsubat_time_60hz"] < sunrise), + ) + .to_pandas() + .dropna() + ) + # This complication is needed because concat will not like an empty DataFrame + nighttime_ubat_raw = ( + nighttime_ubat_raw.copy() + if nighttime_data.empty + else nighttime_data.copy() + if nighttime_ubat_raw.empty + else pd.concat([nighttime_ubat_raw, nighttime_data]) # if both DataFrames non empty + ) + + if not sunsets or not sunrises: + self.logger.info("No sunset or sunrise found during this mission.") + return nighttime_ubat_raw, sunsets, sunrises + def correct_biolume_proxies( # noqa: C901, PLR0912, PLR0913, PLR0915 self, biolume_fluo: pd.Series, # from add_biolume_proxies @@ -1138,6 +1515,16 @@ def resample_variable( # noqa: PLR0913 biolume_sunrises, depth_threshold, ) + elif instr == "wetlabsubat" and variable == "wetlabsubat_digitized_raw_ad_counts": + # All wetlabsubat proxy variables are computed from wetlabsubat_digitized_raw_ad_counts + # Use default parameters for LRAUV - these may need adjustment in the future + proxy_cal_factor = 0.00470 + proxy_ratio_adinos = 3.9811e13 + self.add_wetlabsubat_proxies( + freq=freq, + proxy_cal_factor=proxy_cal_factor, + proxy_ratio_adinos=proxy_ratio_adinos, + ) else: self.df_o[variable] = self.ds[variable].to_pandas() self.df_o[f"{variable}_mf"] = ( @@ -1372,6 +1759,28 @@ def resample_mission( # noqa: C901, PLR0912, PLR0915, PLR0913 self.resampled_nc[var].attrs["coordinates"] = ( "time depth latitude longitude" ) + elif instr == "wetlabsubat" and variable == "wetlabsubat_digitized_raw_ad_counts": + # resample_variable() creates new proxy variables for LRAUV + # not in the original align.nc file + self.resample_variable( + instr, + variable, + mf_width, + freq, + mission_start, + mission_end, + instrs_to_pad, + depth_threshold, + ) + for var in self.df_r: + if var not in variables: + # save new proxy variable + self.df_r[var].index.rename("time", inplace=True) # noqa: PD002 + self.resampled_nc[var] = self.df_r[var].to_xarray() + self.resampled_nc[var].attrs = self.df_r[var].attrs + self.resampled_nc[var].attrs["coordinates"] = ( + "time depth latitude longitude" + ) elif variable in {"biolume_latitude", "biolume_longitude"}: self.logger.info( "Not saving instrument coordinate variable %s to resampled file", From 48c54363b33348948e2e4b2adfc31d24f70271a9 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 2 Dec 2025 10:36:20 -0800 Subject: [PATCH 120/121] Removed 'concentration_of_colored_dissolved_organic_matter_in_sea_water' from / Group. --- src/data/nc42netcdfs.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/data/nc42netcdfs.py b/src/data/nc42netcdfs.py index f3105418..ddb77b8c 100755 --- a/src/data/nc42netcdfs.py +++ b/src/data/nc42netcdfs.py @@ -39,7 +39,6 @@ SCI_PARMS = { "/": [ - {"name": "concentration_of_colored_dissolved_organic_matter_in_sea_water"}, {"name": "longitude"}, {"name": "latitude"}, {"name": "depth"}, From 86c402fc5af9d8415b930b6097fbfcd5ddf58dd1 Mon Sep 17 00:00:00 2001 From: Mike McCann Date: Tue, 2 Dec 2025 10:37:26 -0800 Subject: [PATCH 121/121] Ensure that only the variable 'depth' has that standard_name in the _1S.nc file. --- .vscode/launch.json | 6 ++++-- src/data/resample.py | 10 ++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index bb55b3d5..d5d5c0f9 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -192,7 +192,8 @@ //"args": ["--auv_name", "dorado", "--mission", "2023.123.00", "-v", "1"], //"args": ["-v", "2", "--log_file", "brizo/missionlogs/2025/20250916_20250922/20250916T230652/202509162306_202509180305.nc4"] //"args": ["--auv_name", "dorado", "--mission", "2025.316.02", "-v", "1"], - "args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4"], + //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4"], + "args": ["-v", "1", "--log_file", "ahi/missionlogs/2025/20250414_20250418/20250414T205440/202504142054_202504150400.nc4"], }, { "name": "5.0 - archive.py", @@ -359,7 +360,8 @@ // Fails with ValueError: different number of dimensions on data and dims: 2 vs 1 for wetlabsubat_digitized_raw_ad_counts variable //"args": ["-v", "1", "--log_file", "pontus/missionlogs/2025/20250604_20250616/20250608T020852/202506080209_202506081934.nc4", "--no_cleanup"] // Full month of June 2025 for Pontus with WetLabsUBAT Group data - "args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] + //"args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup"] + "args": ["-v", "1", "--auv_name", "pontus", "--start", "20250601T000000", "--end", "20250702T000000", "--noinput", "--num_cores", "1", "--no_cleanup", "--clobber"] }, ] diff --git a/src/data/resample.py b/src/data/resample.py index 1d467b9d..aaf2aa4c 100755 --- a/src/data/resample.py +++ b/src/data/resample.py @@ -117,12 +117,13 @@ def _build_global_metadata(self) -> None: gitcommit = "" iso_now = datetime.now(tz=UTC).isoformat().split(".")[0] + "Z" - # Ensure that only the latitude and longitude variables have - # standard_name attributes equal to "latitude" and "longitude" so that + # Ensure that only the latitude, longitude, and depth variables have + # standard_name attributes equal to "latitude", "longitude", and "depth" so that # the .cf[] accessor works correctly for var in self.resampled_nc.data_vars: - if self.resampled_nc[var].attrs.get("standard_name") in ["latitude", "longitude"]: - if var in {"latitude", "longitude"}: + standard_name = self.resampled_nc[var].attrs.get("standard_name") + if standard_name in ["latitude", "longitude", "depth"]: + if var in {"latitude", "longitude", "depth"}: continue self.logger.info("Removing standard_name attribute from variable %s", var) del self.resampled_nc[var].attrs["standard_name"] @@ -457,6 +458,7 @@ def save_coordinates( self.df_r["longitude"].index.rename("time", inplace=True) # noqa: PD002 self.resampled_nc["longitude"] = self.df_r["longitude"].to_xarray() self.resampled_nc["depth"].attrs = self.ds[f"{instr}_depth"].attrs + self.resampled_nc["depth"].attrs["standard_name"] = "depth" self.resampled_nc["depth"].attrs["comment"] += ( f". {self.ds[f'{instr}_depth'].attrs['comment']}" f" mean sampled at {self.freq} intervals following"